Skip to content
Snippets Groups Projects
Commit f46b75f9 authored by Heloise Chevalier's avatar Heloise Chevalier
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Pipeline #28512 canceled
.Rhistory 0 → 100644
plot(reg)
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
setwd("~/A18/SY19/TP02")
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
plot(reg)
plot(reg) #tous les plots
summary(reg)
reg4 <- lm(lpsa ~lcp + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
summary(reg)
reg4 <- lm(lpsa ~ age + lcp + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + gleason, data = prostate)
summary(reg4)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + lcp + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
summary(reg)
reg4 <- lm(lpsa ~ age + lbph + lcp + glesaon + pgg45, data = prostate)
reg4 <- lm(lpsa ~ age + lbph + lcp + gleason + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + lbph + gleason + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ . -train -lcavol, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
cor(prostate)
reg3 <- lm(lpsa ~ . - train - lcavol - lbph,data = prostate)
summary(reg3)
summary(reg)
reg3 <- lm(lpsa ~ . - train -lbph,data = prostate)
summary(reg3)
summary(reg)
plot(prostate$lcavol,reg$residuals)
plot(prostate$lweight,reg$residuals)
plot(prostate$lage,reg$residuals)
plot(prostate$age,reg$residuals)
plot(prostate$lpsa,reg$residuals)
reg5 <- lm(sqrt(lpsa) ~ .-train, data = prostate)
sqrt(4)
sqrt(prostate$lpsa)
prostate$lpsa
reg5 <- lm(sqrt(abs(lpsa)) ~ .-train, data = prostate)
summary(reg5)
plot(prostate$lpsa,reg5$fitted.values)
abline(0,1)
plot(prostate$lpsa,reg5$fitted.values)
abs(prostate$lpsa)
plot(prostate$lcavol,reg$residuals)
plot(prostate$lweight,reg$residuals)
plot(prostate$age,reg$residuals)
appr = prostate[prostate$train,1:4]
test = prostate[!prostate$train,]
appr = prostate[prostate$train,]
appr
test
appr = prostate[prostate$train,1:9]
appr
test = prostate[!prostate$train,1:9]
?predict
pred <- lm(lpsa ~ ., data = appr)
predict(pred,newdata = test)
pred$fitted.values
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
setwd("~/A18/SY19/TP02")
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
#estimation de tous les coefficients
#intercept = ordonnée à l'origine
summary(reg)
#pour chaque coefficient on retrouve les estimés (beta chapeau)
#Std error = sigma (espérance des beta chapeau)
#avant dernière colonne = stat de Student
#dernière colonne = P value (plus elle est petite plus on rejette H0)
#lcavol, lweight, svi rejettés
# plus il y a d'étoiles plus c'est significatif
#F-stat = tester si la régression est significative -> p-value faible ici
#(tous les prédicteurs ne sont pas significatifs)
#intervalle de confiance
confint(reg, level = 0.95)
x11()
plot(prostate$lpsa,reg$fitted.values)
abline(0,1)
x11()
plot(prostate$lpsa,reg$residuals)
plot(prostate$lpsa,rstandard(reg)) #vérifier qu'il n'y a pas un point qui a un résidu énorme
# (point aberrant)
#test de Shapiro pour la normalité
shapiro.test(reg$residuals)
#étude de la stabilité de la régression
# distance de Cook
cooks.distance(reg)
pred <- lm(lpsa ~ ., data = appr)
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
appr = prostate[prostate$train,1:9]
test = prostate[!prostate$train,1:9]
pred <- lm(lpsa ~ ., data = appr)
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
confint(pred)
#### Classification Linéaire #
spam = read.table("spambase.dat")
spam
?sample
dim(spam)
dim(spam)[1]
sample.int(n = dim(spam)[1], prob = c(2/3, 1/3))
sample.int(n = dim(spam)[1], prob = 2/3)
sample.int(n = dim(spam)[1], prob = 0.6)
?trunk
sample(x = 1:n, size = trunc(n*2/3))
n = dim(spam)[1]
sample(x = 1:n, size = trunc(n*2/3))
appr = spam[sample,]
selection = sample(x = 1:n, size = trunc(n*2/3))
appr = spam[selection,]
appr
test = spam - appr
test = spam[!selection]
test
test = spam[!selection,]
test
?sort
selection = sort(sample(x = 1:n, size = trunc(n*2/3)))
appr = spam[selection,]
appr
test = spam[(1:n) - selection,]
test = spam[(1:n)[!selection],]
test
test = spam[(1:n) - (1:n)[selection],]
?sample
test = spam[!appr]
test
test = spam[!appr,]
test
?lda
??lda
package(MASS)
install.packages("MASS")
library(MASS)
?lda
test = spam[-appr,]
test = spam[-selection,]
test
spam = read.table("spambase.dat")
n = dim(spam)[1]
selection = sort(sample(x = 1:n, size = trunc(n*2/3)))
spam.appr = spam[selection,]
spam.test = spam[-selection,]
spam.lda(v58~., data = appr)
spam.lda <- lda(v58~., data = appr)
spam.lda <- lda(v58~., data = spam.appr)
spam.appr = spam[selection,]
spam.test = spam[-selection,]
spam.lda <- lda(v58~., data = spam.appr)
spam.appr
spam.lda <- lda(V58~., data = spam.appr)
spam.lda
spam.pred <- predict(spam.lda, newdata = spam.test)
spam.pred
table(spam.test$V58, spam.pred$V58)
table(spam.test$V58, spam.pred$class)
spam.pred$class == spam.test$V58
library(pROC)
install.packages("pROC")
library(pROC)
library(pROC)
spam.pred$x
roc_curve <- roc(spam.test$V58, as.vector(spam.pred$x))
plot(roc_curve)
View(spam.pred)
roc_curve <- roc(spam.test$V58, spam.pred$class)
spam.glm <- glm(V58~.data = spam.appr, family = binomial)
spam.glm <- glm(V58~.,data = spam.appr, family = binomial)
summary(spam.glm)
spam.glm.pred <- predict(spam.glm, newdata = spam.test, type ="response")
table(spam.test$V58,spam.glm.pred>0.5)
table(spam.test$V58, spam.pred$class)
logit <- predict(spam.glm, newdata = spam.test, type = "link")
plot(roc_glm, add = TRUE, col = 'red')
roc_glm = roc(spam.test$V58,logit)
plot(roc_glm, add = TRUE, col = 'red')
summary(spam.glm)
summary(spam.lda)
summary(spam.glm)
table(spam.test$V58,spam.glm.pred>0.5)
888+64+56+526
(56+64)/1534
table(spam.test$V58, spam.pred$class)
(108+64)/1534
?confusion.matrix
library(SDMTools)
install.packages("SDMTools")
library(SDMTools)
library(SDMTools)
?confusion.matrix
#Matrices de confusion
confusion.matrix(spam.test$V58,spam.pred$class)
confusion.matrix(spam.test$V58,spam.glm.pred)
?rnorm
?mvrnorm
pb_class <- read.table("tp3_a18_clas_app.txt")
setwd("~/A18/SY19/TP03")
pb_class <- read.table("tp3_a18_clas_app.txt")
pb_class
pb_reg <- read.table("tp3_a18_reg_app.txt")
summary(pb_reg)
reg <- lm(y ~ ., data = reg.appr)
pb_class <- read.table("tp3_a18_clas_app.txt")
pb_reg <- read.table("tp3_a18_reg_app.txt")
#ensembles d'apprentissage et de test
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n, size = trunc(n*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
n.class = dim(pb_class)[1]
class.mask = sort(sample(x = 1:n, size = trunc(n*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n.reg, size = trunc(n*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
n.class = dim(pb_class)[1]
class.mask = sort(sample(x = 1:n.class, size = trunc(n*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
#ensembles d'apprentissage et de test
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n.reg, size = trunc(n*2/3)))
#ensembles d'apprentissage et de test
nreg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:nreg, size = trunc(n*2/3)))
reg.mask = sort(sample(x = 1:nreg, size = trunc(nreg*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
nclass = dim(pb_class)[1]
class.mask = sort(sample(x = 1:nclass, size = trunc(nclass*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
summary(pb_reg)
reg <- lm(y ~ ., data = reg.appr)
summary(reg)
confint(reg)
x11()
plot(pb_reg$y,reg$fitted.values)
abline(0,1)
x11()
plot(reg.appr$y,reg$fitted.values)
abline(0,1)
plot((reg.appr$y,rstandard(reg))
plot(reg.appr$y,rstandard(reg))
plot(reg.appr$y,rstudent(reg))
shapiro.test(reg$residuals)
summary(reg)
reg2 <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41, data = reg.appr)
(summary(reg2))
x11()
plot(reg.appr$y, reg2$fitted.values)
abline(0,1)
x11()
plot(reg.appr$y,reg$fitted.values)
abline(0,1)
#avec les données de test
predict <- predict(reg,newdata = reg.test)
x11()
plot(reg.test$y,predict)
abline(0,1)
#calcul de la mse // refaire les ensembles à chaque itération?
mse <- function(k_max,train,y.train, test, y.tst){
mse <- matrix(0,k_max,1)
for (i in 1:k_max){
reg <- lm(y.train ~ ., data = train)
pred <- predict(reg,newdata = test)
err <- (pred - y.tst)**2
mse[i] <- mean(err)
}
mse
}
predict
#résidus en fonction des variables explicatives
attach(pb_reg)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(pb_reg)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(reg.appr)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
x11()
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(reg.appr)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
plot(reg.appr$y,rstd)
plot(reg.appr$y,rstu)
shapiro.test(reg$residuals)
rres = reg$residuals
shapiro.test(rres)
qqnorm(rres, asp = 1)
qqline(rres, dist = qnorm)
qqnorm(rstd, asp = 1)
qqline(rstd, dist = qnorm)
qqnorm(rstu, asp = 1)
qqline(rstu, dist = qnorm)
#influence globale
plot(reg, which = 4, cook.levels = c(0, 0.1))
plot(lm.model, which = 5, cook.levels = c(0, 0.1))
plot(reg, which = 5, cook.levels = c(0, 0.1))
plot(reg2, which = 5, cook.levels = c(0, 0.1))
plot(reg2, which = 4, cook.levels = c(0, 0.1))
plot(reg2, which = 5, cook.levels = c(0, 0.1))
pred <- predict(reg,newdata = reg.test, interval = "prediction")
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
yi = pb_reg$y
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
yi = reg.test$y
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
x11()
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
pred[,2]
knn.reg?
q
?knn.reg
#install.packages("FNN")
library(FNN)
install.packages("FNN")
library(FNN)
?knn.reg
x.appr = reg.appr[,-y]
y.appr = reg.appr[,y]
dim(reg.appr)
x.appr = reg.appr[,1:50]
y.appr = reg.appr[,y]
x.appr = reg.appr[,1:50]
y.appr = reg.appr[,'y']
x.appr
reg.appr
x.test = reg.test[,1:50]
y.test = reg.test[,'y']
reg.knn = knn.reg(train = x.appr, y = y.appr, k = 5, test = x.test)
reg.knn
mean((y.test-prostate.knn$pred)^2)
mean((y.test-reg.knn$pred)^2)
reg.knn$pred
y.test
#stepwise selection
reg.fit<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
install.packages("leaps")
#stepwise selection
library(leaps)
reg.fit<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
plot(reg.fit,scale="r2")
x11()
plot(reg.fit,scale="r2")
reg.fit.f<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
x11()
plot(reg.fit.f,scale="r2")
reg.fit.b<-regsubsets(y ~ .,data=reg.appr,method="backward",nvmax=15)
x11()
plot(reg.fit.b,scale="r2")
reg.fit <- regsubsets(y ~., data = reg.appr, method = "exhaustive", nvmax = 15)
plot(reg.fit,scale="adjr2")
reg.fit <- regsubsets(y ~., data = reg.appr, method = "exhaustive", nvmax = 15, really.big = T)
plot(reg.fit,scale="adjr2")
x11()
plot(reg.fit,scale="bic")
x11()
plot(reg.fit.f,scale="r2")
x11()
plot(reg.fit.b,scale="r2")
class.appr
class.lda <- lda(y ~ ., data = class.appr)
??lda
library(MASS)
class.lda <- lda(y ~ ., data = class.appr)
summary(class.lda)
class.pred <- predict(class.lda, newdata = class.test)
tp03.r 0 → 100644
pb_class <- read.table("tp3_a18_clas_app.txt")
pb_reg <- read.table("tp3_a18_reg_app.txt")
#ensembles d'apprentissage et de test
nreg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:nreg, size = trunc(nreg*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
nclass = dim(pb_class)[1]
class.mask = sort(sample(x = 1:nclass, size = trunc(nclass*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
#modles de rgression
#rgression linaire
summary(pb_reg)
reg <- lm(y ~ ., data = reg.appr)
summary(reg)
confint(reg)
x11()
plot(reg.appr$y,reg$fitted.values)
abline(0,1)
rres = reg$residuals
rstd = rstandard(reg)
rstu = rstudent(reg)
plot(reg.appr$y,rstd)
plot(reg.appr$y,rstu)
shapiro.test(rres)
#rsidus en fonction des variables explicatives
attach(reg.appr)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
## Q-Q plots
qqnorm(rres, asp = 1)
qqline(rres, dist = qnorm)
qqnorm(rstd, asp = 1)
qqline(rstd, dist = qnorm)
qqnorm(rstu, asp = 1)
qqline(rstu, dist = qnorm)
#influence globale
plot(reg, which = 4, cook.levels = c(0, 0.1))
plot(reg, which = 5, cook.levels = c(0, 0.1))
reg2 <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41, data = reg.appr)
(summary(reg2))
x11()
plot(reg.appr$y, reg2$fitted.values)
abline(0,1)
plot(reg2, which = 4, cook.levels = c(0, 0.1))
plot(reg2, which = 5, cook.levels = c(0, 0.1))
#avec les donnes de test
pred <- predict(reg,newdata = reg.test, interval = "prediction")
x11()
plot(reg.test$y,predict)
yi = reg.test$y
x11()
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
#stepwise selection
library(leaps)
#forward
reg.fit.f<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
x11()
plot(reg.fit.f,scale="r2")
#backward
reg.fit.b<-regsubsets(y ~ .,data=reg.appr,method="backward",nvmax=15)
x11()
plot(reg.fit.b,scale="r2")
#AIC et BIC
reg.fit <- regsubsets(y ~., data = reg.appr, method = "exhaustive", nvmax = 15, really.big = T)
x11()
plot(reg.fit,scale="adjr2")
x11()
plot(reg.fit,scale="bic")
#knn reg
x.appr = reg.appr[,1:50]
y.appr = reg.appr[,'y']
x.test = reg.test[,1:50]
y.test = reg.test[,'y']
reg.knn = knn.reg(train = x.appr, y = y.appr, k = 5, test = x.test)
mean((y.test-reg.knn$pred)^2)
#### Classifieur ####
class.lda <- lda(y ~ ., data = class.appr)
summary(class.lda)
class.pred <- predict(class.lda, newdata = class.test)
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment