library(MASS) library(leaps) library(corrplot) library(glmnet) #chargement des donn�es reg.data <- read.table("tp3_a18_reg_app.txt") x.reg <- reg.data[,1:50] y.reg <- reg.data[,"y"] n = dim(reg.data)[1] p = dim(reg.data)[2] models.list <- c("subset13", "subset18", "subset.stepwise15", "subset.stepwise12", "subset.stepwise20", "subset.stepwise22", "subset.stepwise18", "ridge13", "ridge18", "ridge.stepwise15", "ridge.stepwise12", "ridge.stepwise20", "ridge.stepwise22", "ridge.stepwise18", "lasso13", "lasso18", "lasso.stepwise15", "lasso.stepwise12", "lasso.stepwise20", "lasso.stepwise22", "lasso.stepwise18") # Liste des modeles models.error <- c() trials <- 10 # Nombre d'essais de cross-validation par mod�le n_folds <- 5 # Nombre de blocs pour la cross-validation # Test pour chaque mod�le for(model in models.list) # Pour chaque modele { models.error[model] <- 0 for(i in c(1:trials)) # On fait plusieurs essais { folds <- sample(rep(1:n_folds, length.out=n)) for(k in 1:n_folds) # Sur chaque segment de la cross-validation { reg.train <- reg.data[folds!=k,] reg.train.X <- reg.train[,-p] reg.train.y <- reg.train$y reg.test <- reg.data[folds==k,] reg.test.X <- reg.test[,-p] reg.test.y <- reg.test$y # Apprentissage du modele if(model == "subset13") { reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, data = reg.train) } else if(model == "subset18") { reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, data = reg.train) } else if(model == "subset.stepwise15") { reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, data = reg.train) } else if(model == "subset.stepwise12") { reg.model <- lm(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49 , data = reg.train) } else if(model == "subset.stepwise20") { reg.model <- lm(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, data = reg.train) } else if(model == "subset.stepwise22") { reg.model <- lm(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, data = reg.train) } else if(model == "subset.stepwise18") { reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, data = reg.train) } else if(model == "ridge13") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X28 + X31 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge18") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge.stepwise15") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge.stepwise12") { x <- model.matrix(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge.stepwise20") { x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge.stepwise22") { x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "ridge.stepwise18") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0) } else if(model == "lasso13") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso18") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso.stepwise15") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso.stepwise12") { x <- model.matrix(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso.stepwise20") { x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso.stepwise22") { x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else if(model == "lasso.stepwise18") { x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, reg.data) xapp <- x[folds!=k,] xtst <- x[folds == k,] cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1) reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1) } else { clus.model <- NULL } if(!is.null(reg.model)) { if(model == "subset13" || model == "subset18" || model == "subset.stepwise15" || model == "subset.stepwise12" || model == "subset.stepwise20" || model == "subset.stepwise22" || model == "subset.stepwise18") { reg.predict <- predict(reg.model, newdata = reg.test) models.error[model] <- models.error[model] + mean((reg.test.y - reg.predict)^2) } else { reg.predict <- predict(reg.model,s=cv.out$lambda.min,newx=xtst) models.error[model] <- models.error[model] + mean((reg.test.y - reg.predict)^2) } } } } models.error[model] <- models.error[model]/(n_folds*trials) } print(models.error)