Skip to content
Snippets Groups Projects
cv_regression.r 8.86 KiB
Newer Older
library(MASS)
library(leaps)
library(corrplot)
library(glmnet)

#chargement des donnes
reg.data <- read.table("tp3_a18_reg_app.txt")

x.reg <- reg.data[,1:50]
y.reg <- reg.data[,"y"]

n = dim(reg.data)[1]
p = dim(reg.data)[2]


models.list <- c("subset13", "subset18", "subset.stepwise15", "subset.stepwise12", "subset.stepwise20", "subset.stepwise22", "subset.stepwise18", "ridge13", "ridge18", "ridge.stepwise15", "ridge.stepwise12", "ridge.stepwise20", "ridge.stepwise22", "ridge.stepwise18", "lasso13", "lasso18", "lasso.stepwise15", "lasso.stepwise12", "lasso.stepwise20", "lasso.stepwise22", "lasso.stepwise18") # Liste des modeles
models.error <- c()
trials <- 10 # Nombre d'essais de cross-validation par modle
n_folds <- 5 # Nombre de blocs pour la cross-validation

# Test pour chaque modle

for(model in models.list) # Pour chaque modele
{
  models.error[model] <- 0
  for(i in c(1:trials)) # On fait plusieurs essais
  {
    folds <- sample(rep(1:n_folds, length.out=n))
    for(k in 1:n_folds) # Sur chaque segment de la cross-validation
    {
      reg.train <- reg.data[folds!=k,]
      reg.train.X <- reg.train[,-p]
      reg.train.y <- reg.train$y
      reg.test <- reg.data[folds==k,]
      reg.test.X <- reg.test[,-p]
      reg.test.y <- reg.test$y
      
      # Apprentissage du modele
      if(model == "subset13")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, data = reg.train)
      }
      else if(model == "subset18")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, data = reg.train)
      }
      else if(model == "subset.stepwise15")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, data = reg.train)
      }
      else if(model == "subset.stepwise12")
      {
        reg.model <- lm(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49 , data = reg.train)
      }
      else if(model == "subset.stepwise20")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, data = reg.train)
      }
      else if(model == "subset.stepwise22")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, data = reg.train)
      }
      else if(model == "subset.stepwise18")
      {
        reg.model <- lm(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, data = reg.train)
      }
      else if(model == "ridge13")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X28 + X31 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge18")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge.stepwise15")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge.stepwise12")
      {
        x <- model.matrix(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
      
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
      
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge.stepwise20")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge.stepwise22")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "ridge.stepwise18")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=0)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=0)
      }
      else if(model == "lasso13")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso18")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X40 + X43 + X46, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso.stepwise15")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X41 + X24 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso.stepwise12")
      {
        x <- model.matrix(y ~ X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 + X1 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
      
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
      
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso.stepwise20")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso.stepwise22")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X10 + X14 + X16 + X17 + X19 + X24 + X32 + X34 + X35 + X37 + X38 + X39 + X40 + X41 + X43 + X46 + X49, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else if(model == "lasso.stepwise18")
      {
        x <- model.matrix(y ~ X1 + X2 + X3 + X14 + X19 + X32 + X34 + X35 + X37 + X38 + X39 +X41 + X49 + X10 + X24 + X17 + X43 + X46, reg.data)
        xapp <- x[folds!=k,]
        xtst <- x[folds == k,]
        
        cv.out<-cv.glmnet(xapp,reg.train.y,alpha=1)
        
        reg.model<-glmnet(xapp,reg.train.y,lambda=cv.out$lambda.min,alpha=1)
      }
      else
      {
        clus.model <- NULL
      }
      
      if(!is.null(reg.model))
      {
        if(model == "subset13" || model == "subset18" || model == "subset.stepwise15" || model == "subset.stepwise12" || model == "subset.stepwise20" || model == "subset.stepwise22" || model == "subset.stepwise18")
        {
          reg.predict <- predict(reg.model, newdata = reg.test)
          models.error[model] <- models.error[model] + mean((reg.test.y - reg.predict)^2)
        }
        else
        {
          reg.predict <- predict(reg.model,s=cv.out$lambda.min,newx=xtst)
          models.error[model] <- models.error[model] + mean((reg.test.y - reg.predict)^2)
          
        }
      }
    }
  }
  models.error[model] <- models.error[model]/(n_folds*trials)
}

print(models.error)