Commit 3a023ebf authored by Mathilde Rineau's avatar Mathilde Rineau 🙂
Browse files

Update Devoir2.ipynb

parent 84c5a384
......@@ -78,7 +78,29 @@
" \n",
" y = np.mean(X, axis=1)\n",
"\n",
" return X, y\n"
" return X, y\n",
"# /!\\ THIS IS A THIRD TEST VERSION, COULD (AND WILL CERTAINLY) CHANGE\n",
"\n",
"\n",
"import random\n",
"def generate_data_2(n_samples, n_features):\n",
" X = []\n",
" y = np.ndarray((n_samples,))\n",
" X.append(np.random.geometric(p = 0.5, size = n_features))\n",
" sum_X = np.ndarray((n_features,))\n",
" for i in range(n_samples):\n",
" p = random.random()\n",
" temp = np.random.geometric(p = p, size = n_features)\n",
" #print(temp)\n",
" sum_X = sum_X + temp\n",
" #print(sum_X)\n",
" X.append(sum_X)\n",
" X = np.array(X)\n",
" \n",
" \n",
" y = np.mean(X, axis=1)\n",
"\n",
" return X, y"
]
},
{
......@@ -135,6 +157,30 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Arrays to store results\n",
"n_selected = []\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
" # Generate the data\n",
" X, y = generate_data_2(n_samples, n_features,)\n",
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = lasso.coef_ != 0\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -177,7 +223,41 @@
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"print(f'Features selected : {uniq}, count : {count}')\n",
"print(f'Number of time fist feature was ignored : {zero_removed}')"
"print(f'Number of time fist feature was ignored : {zero_removed}')\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Arrays to store results\n",
"n_selected = []\n",
"zero_removed = 0\n",
"\n",
"# We test our regression n_test time\n",
"for i in range(n_tests):\n",
" # Generate the data\n",
" X, y = generate_data_2(n_samples, n_features)\n",
" # Fit the model (pipeline with the data)\n",
" model.fit(X, y)\n",
" # We can now retrieve selected features :\n",
" selected_features = elastic_net.coef_ != 0\n",
" n_selected.append(np.count_nonzero(selected_features))\n",
"\n",
" # Fastly show that we always remove X[0]\n",
" if not selected_features[0]:\n",
" zero_removed += 1\n",
"\n",
"\n",
"uniq, count = np.unique(n_selected, return_counts=True)\n",
"print(f'Features selected : {uniq}, count : {count}')\n",
"print(f'Number of time fist feature was ignored : {zero_removed}')\n",
"plt.bar(uniq, count, label='Number of selected features per training')\n",
"plt.show()"
]
},
{
......@@ -194,6 +274,13 @@
"\n",
"It is **like** the elastic net « found » that each $X[i], i > 0$ were generated from $X[0]$ but did not « found » a link between the elements.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......@@ -201,7 +288,8 @@
"hash": "3abb0a1ef4892304d86bb3a3dfd052bcca35057beadba016173999c775e8d3ba"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('AOS1-QteoCFsS': pipenv)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
......@@ -213,7 +301,8 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment