Commit b674df38 authored by Rémy Huet's avatar Rémy Huet 💻
Browse files

qqs ajouts, rien de bien satisfaisant

parent 3a023ebf
......@@ -126,7 +126,7 @@
"outputs": [],
"source": [
"# Params for data generation:\n",
"n_samples = 500\n",
"n_samples = 300\n",
"n_features = 50\n",
"m = 30\n",
"s = 3\n",
......@@ -135,12 +135,13 @@
"n_tests = 100\n",
"\n",
"standard_scaler = StandardScaler(with_mean=False)\n",
"lasso = Lasso(alpha=1.0, fit_intercept=True, max_iter=5000)\n",
"lasso = Lasso(alpha=3.0, fit_intercept=True, max_iter=5000)\n",
"\n",
"model = make_pipeline(standard_scaler, lasso)\n",
"\n",
"# Arrays to store results\n",
"n_selected = []\n",
"selection_count = np.zeros((n_features))\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
......@@ -149,11 +150,20 @@
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = lasso.coef_ != 0\n",
" selected_features = (lasso.coef_ != 0) * 1 # (lasso.coef_ != 0) gives a matrix of True / False. * 1 transforms True in 1 and False in 0\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
" selection_count += selected_features\n",
"\n",
"# Using n_selected, we can display the number of selected features per training\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.legend()\n",
"plt.show()\n",
"\n",
"# Using selection_count, we can display the number of times each feature was selected\n",
"plt.bar(range(n_features), selection_count, label='Number of time each feature was selected')\n",
"plt.legend()\n",
"plt.show()"
]
},
......@@ -165,6 +175,7 @@
"source": [
"# Arrays to store results\n",
"n_selected = []\n",
"selection_count = np.zeros((n_features))\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
......@@ -173,11 +184,16 @@
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = lasso.coef_ != 0\n",
" selected_features = (lasso.coef_ != 0) * 1 # (lasso.coef_ != 0) gives a matrix of True / False. * 1 transforms True in 1 and False in 0\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
" selection_count += selected_features\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.show()\n",
"# Using selection_count, we can display the number of times each feature was selected\n",
"plt.bar(range(n_features), selection_count, label='Number of time each feature was selected')\n",
"plt.legend()\n",
"plt.show()"
]
},
......@@ -198,13 +214,13 @@
"source": [
"# We use the same alpha as the lasso regression\n",
"# Assume we really want to select features, we give the priority to l1\n",
"elastic_net = ElasticNet(alpha=1.0, l1_ratio=0.8, fit_intercept=True, max_iter=10000)\n",
"elastic_net = ElasticNet(alpha=3.0, l1_ratio=0.9, fit_intercept=True, max_iter=10000)\n",
"\n",
"model = make_pipeline(standard_scaler, elastic_net)\n",
"\n",
"# Arrays to store results\n",
"n_selected = []\n",
"zero_removed = 0\n",
"selection_count = np.zeros((n_features))\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
......@@ -213,18 +229,16 @@
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = elastic_net.coef_ != 0\n",
" selected_features = (elastic_net.coef_ != 0) * 1\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
"\n",
" # Fastly show that we always remove X[0]\n",
" if not selected_features[0]:\n",
" zero_removed += 1\n",
" selection_count += selected_features\n",
"\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"print(f'Features selected : {uniq}, count : {count}')\n",
"print(f'Number of time fist feature was ignored : {zero_removed}')\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"\n",
"plt.bar(range(n_features), selection_count, label='Number of time each feature was selected')\n",
"plt.legend()\n",
"plt.show()"
]
},
......@@ -236,7 +250,7 @@
"source": [
"# Arrays to store results\n",
"n_selected = []\n",
"zero_removed = 0\n",
"selection_count = np.zeros((n_features))\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
......@@ -245,18 +259,18 @@
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = elastic_net.coef_ != 0\n",
" selected_features = (elastic_net.coef_ != 0) * 1\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
"\n",
" # Fastly show that we always remove X[0]\n",
" if not selected_features[0]:\n",
" zero_removed += 1\n",
" selection_count += selected_features\n",
"\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"print(f'Features selected : {uniq}, count : {count}')\n",
"print(f'Number of time fist feature was ignored : {zero_removed}')\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.legend()\n",
"plt.show()\n",
"\n",
"plt.bar(range(n_features), selection_count, label='Number of time each feature was selected')\n",
"#plt.legend()\n",
"plt.show()"
]
},
......@@ -274,13 +288,6 @@
"\n",
"It is **like** the elastic net « found » that each $X[i], i > 0$ were generated from $X[0]$ but did not « found » a link between the elements.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......@@ -301,8 +308,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment