plot(reg)
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
setwd("~/A18/SY19/TP02")
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
plot(reg)
plot(reg) #tous les plots
summary(reg)
reg4 <- lm(lpsa ~lcp + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
summary(reg)
reg4 <- lm(lpsa ~ age + lcp + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + gleason + pgg45, data = prostate)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + gleason, data = prostate)
summary(reg4)
summary(reg4)
#lcp rejetté
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + lcp + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
summary(reg)
reg4 <- lm(lpsa ~ age + lbph + lcp + glesaon + pgg45, data = prostate)
reg4 <- lm(lpsa ~ age + lbph + lcp + gleason + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ age + lbph + gleason + pgg45, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
reg4 <- lm(lpsa ~ . -train -lcavol, data = prostate)
summary(reg4)
x11()
plot(prostate$lpsa,reg4$fitted.values)
abline(0,1)
cor(prostate)
reg3 <- lm(lpsa ~ . - train - lcavol - lbph,data = prostate)
summary(reg3)
summary(reg)
reg3 <- lm(lpsa ~ . - train -lbph,data = prostate)
summary(reg3)
summary(reg)
plot(prostate$lcavol,reg$residuals)
plot(prostate$lweight,reg$residuals)
plot(prostate$lage,reg$residuals)
plot(prostate$age,reg$residuals)
plot(prostate$lpsa,reg$residuals)
reg5 <- lm(sqrt(lpsa) ~ .-train, data = prostate)
sqrt(4)
sqrt(prostate$lpsa)
prostate$lpsa
reg5 <- lm(sqrt(abs(lpsa)) ~ .-train, data = prostate)
summary(reg5)
plot(prostate$lpsa,reg5$fitted.values)
abline(0,1)
plot(prostate$lpsa,reg5$fitted.values)
abs(prostate$lpsa)
plot(prostate$lcavol,reg$residuals)
plot(prostate$lweight,reg$residuals)
plot(prostate$age,reg$residuals)
appr = prostate[prostate$train,1:4]
test = prostate[!prostate$train,]
appr = prostate[prostate$train,]
appr
test
appr = prostate[prostate$train,1:9]
appr
test = prostate[!prostate$train,1:9]
?predict
pred <- lm(lpsa ~ ., data = appr)
predict(pred,newdata = test)
pred$fitted.values
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
setwd("~/A18/SY19/TP02")
prostate = read.table("prostate.data")
reg <- lm(lpsa ~ . - train, data = prostate)
#estimation de tous les coefficients
#intercept = ordonnée à l'origine
summary(reg)
#pour chaque coefficient on retrouve les estimés (beta chapeau)
#Std error = sigma (espérance des beta chapeau)
#avant dernière colonne = stat de Student
#dernière colonne = P value (plus elle est petite plus on rejette H0)
#lcavol, lweight, svi rejettés
# plus il y a d'étoiles plus c'est significatif
#F-stat = tester si la régression est significative -> p-value faible ici
#(tous les prédicteurs ne sont pas significatifs)
#intervalle de confiance
confint(reg, level = 0.95)
x11()
plot(prostate$lpsa,reg$fitted.values)
abline(0,1)
x11()
plot(prostate$lpsa,reg$residuals)
plot(prostate$lpsa,rstandard(reg)) #vérifier qu'il n'y a pas un point qui a un résidu énorme
# (point aberrant)
#test de Shapiro pour la normalité
shapiro.test(reg$residuals)
#étude de la stabilité de la régression
# distance de Cook
cooks.distance(reg)
pred <- lm(lpsa ~ ., data = appr)
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
appr = prostate[prostate$train,1:9]
test = prostate[!prostate$train,1:9]
pred <- lm(lpsa ~ ., data = appr)
predict <- predict(pred,newdata = test)
plot(test$lpsa,predict)
abline(0,1)
confint(pred)
#### Classification Linéaire #
spam = read.table("spambase.dat")
spam
?sample
dim(spam)
dim(spam)[1]
sample.int(n = dim(spam)[1], prob = c(2/3, 1/3))
sample.int(n = dim(spam)[1], prob = 2/3)
sample.int(n = dim(spam)[1], prob = 0.6)
?trunk
sample(x = 1:n, size = trunc(n*2/3))
n = dim(spam)[1]
sample(x = 1:n, size = trunc(n*2/3))
appr = spam[sample,]
selection = sample(x = 1:n, size = trunc(n*2/3))
appr = spam[selection,]
appr
test = spam - appr
test = spam[!selection]
test
test = spam[!selection,]
test
?sort
selection = sort(sample(x = 1:n, size = trunc(n*2/3)))
appr = spam[selection,]
appr
test = spam[(1:n) - selection,]
test = spam[(1:n)[!selection],]
test
test = spam[(1:n) - (1:n)[selection],]
?sample
test = spam[!appr]
test
test = spam[!appr,]
test
?lda
??lda
package(MASS)
install.packages("MASS")
library(MASS)
?lda
test = spam[-appr,]
test = spam[-selection,]
test
spam = read.table("spambase.dat")
n = dim(spam)[1]
selection = sort(sample(x = 1:n, size = trunc(n*2/3)))
spam.appr = spam[selection,]
spam.test = spam[-selection,]
spam.lda(v58~., data = appr)
spam.lda <- lda(v58~., data = appr)
spam.lda <- lda(v58~., data = spam.appr)
spam.appr = spam[selection,]
spam.test = spam[-selection,]
spam.lda <- lda(v58~., data = spam.appr)
spam.appr
spam.lda <- lda(V58~., data = spam.appr)
spam.lda
spam.pred <- predict(spam.lda, newdata = spam.test)
spam.pred
table(spam.test$V58, spam.pred$V58)
table(spam.test$V58, spam.pred$class)
spam.pred$class == spam.test$V58
library(pROC)
install.packages("pROC")
library(pROC)
library(pROC)
spam.pred$x
roc_curve <- roc(spam.test$V58, as.vector(spam.pred$x))
plot(roc_curve)
View(spam.pred)
roc_curve <- roc(spam.test$V58, spam.pred$class)
spam.glm <- glm(V58~.data = spam.appr, family = binomial)
spam.glm <- glm(V58~.,data = spam.appr, family = binomial)
summary(spam.glm)
spam.glm.pred <- predict(spam.glm, newdata = spam.test, type ="response")
table(spam.test$V58,spam.glm.pred>0.5)
table(spam.test$V58, spam.pred$class)
logit <- predict(spam.glm, newdata = spam.test, type = "link")
plot(roc_glm, add = TRUE, col = 'red')
roc_glm = roc(spam.test$V58,logit)
plot(roc_glm, add = TRUE, col = 'red')
summary(spam.glm)
summary(spam.lda)
summary(spam.glm)
table(spam.test$V58,spam.glm.pred>0.5)
888+64+56+526
(56+64)/1534
table(spam.test$V58, spam.pred$class)
(108+64)/1534
?confusion.matrix
library(SDMTools)
install.packages("SDMTools")
library(SDMTools)
library(SDMTools)
?confusion.matrix
#Matrices de confusion
confusion.matrix(spam.test$V58,spam.pred$class)
confusion.matrix(spam.test$V58,spam.glm.pred)
?rnorm
?mvrnorm
pb_class <- read.table("tp3_a18_clas_app.txt")
setwd("~/A18/SY19/TP03")
pb_class <- read.table("tp3_a18_clas_app.txt")
pb_class
pb_reg <- read.table("tp3_a18_reg_app.txt")
summary(pb_reg)
reg <- lm(y ~ ., data = reg.appr)
pb_class <- read.table("tp3_a18_clas_app.txt")
pb_reg <- read.table("tp3_a18_reg_app.txt")
#ensembles d'apprentissage et de test
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n, size = trunc(n*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
n.class = dim(pb_class)[1]
class.mask = sort(sample(x = 1:n, size = trunc(n*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n.reg, size = trunc(n*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
n.class = dim(pb_class)[1]
class.mask = sort(sample(x = 1:n.class, size = trunc(n*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
#ensembles d'apprentissage et de test
n.reg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:n.reg, size = trunc(n*2/3)))
#ensembles d'apprentissage et de test
nreg = dim(pb_reg)[1]
reg.mask = sort(sample(x = 1:nreg, size = trunc(n*2/3)))
reg.mask = sort(sample(x = 1:nreg, size = trunc(nreg*2/3)))
reg.appr = pb_reg[reg.mask,]
reg.test = pb_reg[-reg.mask,]
nclass = dim(pb_class)[1]
class.mask = sort(sample(x = 1:nclass, size = trunc(nclass*2/3)))
class.appr = pb_class[class.mask,]
class.test = pb_class[-class.mask,]
summary(pb_reg)
reg <- lm(y ~ ., data = reg.appr)
summary(reg)
confint(reg)
x11()
plot(pb_reg$y,reg$fitted.values)
abline(0,1)
x11()
plot(reg.appr$y,reg$fitted.values)
abline(0,1)
plot((reg.appr$y,rstandard(reg))
plot(reg.appr$y,rstandard(reg))
plot(reg.appr$y,rstudent(reg))
shapiro.test(reg$residuals)
summary(reg)
reg2 <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41, data = reg.appr)
(summary(reg2))
x11()
plot(reg.appr$y, reg2$fitted.values)
abline(0,1)
x11()
plot(reg.appr$y,reg$fitted.values)
abline(0,1)
#avec les données de test
predict <- predict(reg,newdata = reg.test)
x11()
plot(reg.test$y,predict)
abline(0,1)
#calcul de la mse // refaire les ensembles à chaque itération?
mse <- function(k_max,train,y.train, test, y.tst){
mse <- matrix(0,k_max,1)
for (i in 1:k_max){
reg <- lm(y.train ~ ., data = train)
pred <- predict(reg,newdata = test)
err <- (pred - y.tst)**2
mse[i] <- mean(err)
}
mse
}
predict
#résidus en fonction des variables explicatives
attach(pb_reg)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(pb_reg)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(reg.appr)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
x11()
rres = reg$fitted.values
rstd = rstandard(reg)
rstu = rstudent(reg)
attach(reg.appr)
varnames <- attr(reg$terms, "term.labels")
par(mfrow = c(length(varnames), 3))
par(mar=c(1,1,1,1))
for(name in varnames) {
plot(get(name), rres, xlab = name)
plot(get(name), rstd, xlab = name)
plot(get(name), rstu, xlab = name)
}
plot(reg.appr$y,rstd)
plot(reg.appr$y,rstu)
shapiro.test(reg$residuals)
rres = reg$residuals
shapiro.test(rres)
qqnorm(rres, asp = 1)
qqline(rres, dist = qnorm)
qqnorm(rstd, asp = 1)
qqline(rstd, dist = qnorm)
qqnorm(rstu, asp = 1)
qqline(rstu, dist = qnorm)
#influence globale
plot(reg, which = 4, cook.levels = c(0, 0.1))
plot(lm.model, which = 5, cook.levels = c(0, 0.1))
plot(reg, which = 5, cook.levels = c(0, 0.1))
plot(reg2, which = 5, cook.levels = c(0, 0.1))
plot(reg2, which = 4, cook.levels = c(0, 0.1))
plot(reg2, which = 5, cook.levels = c(0, 0.1))
pred <- predict(reg,newdata = reg.test, interval = "prediction")
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
yi = pb_reg$y
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
yi = reg.test$y
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
x11()
plot(yi, pred[, 1], ylim = range(pred), asp = 1)
lines(yi, pred[, 2])
lines(yi, pred[, 3])
abline(0, 1)
pred[,2]
knn.reg?
q
?knn.reg
#install.packages("FNN")
library(FNN)
install.packages("FNN")
library(FNN)
?knn.reg
x.appr = reg.appr[,-y]
y.appr = reg.appr[,y]
dim(reg.appr)
x.appr = reg.appr[,1:50]
y.appr = reg.appr[,y]
x.appr = reg.appr[,1:50]
y.appr = reg.appr[,'y']
x.appr
reg.appr
x.test = reg.test[,1:50]
y.test = reg.test[,'y']
reg.knn = knn.reg(train = x.appr, y = y.appr, k = 5, test = x.test)
reg.knn
mean((y.test-prostate.knn$pred)^2)
mean((y.test-reg.knn$pred)^2)
reg.knn$pred
y.test
#stepwise selection
reg.fit<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
install.packages("leaps")
#stepwise selection
library(leaps)
reg.fit<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
plot(reg.fit,scale="r2")
x11()
plot(reg.fit,scale="r2")
reg.fit.f<-regsubsets(y ~ .,data=reg.appr,method="forward",nvmax=15)
x11()
plot(reg.fit.f,scale="r2")
reg.fit.b<-regsubsets(y ~ .,data=reg.appr,method="backward",nvmax=15)
x11()
plot(reg.fit.b,scale="r2")
reg.fit <- regsubsets(y ~., data = reg.appr, method = "exhaustive", nvmax = 15)
plot(reg.fit,scale="adjr2")
reg.fit <- regsubsets(y ~., data = reg.appr, method = "exhaustive", nvmax = 15, really.big = T)
plot(reg.fit,scale="adjr2")
x11()
plot(reg.fit,scale="bic")
x11()
plot(reg.fit.f,scale="r2")
x11()
plot(reg.fit.b,scale="r2")
class.appr
class.lda <- lda(y ~ ., data = class.appr)
??lda
library(MASS)
class.lda <- lda(y ~ ., data = class.appr)
summary(class.lda)
class.pred <- predict(class.lda, newdata = class.test)