Commit 9074a236 authored by Rémy Huet's avatar Rémy Huet 💻
Browse files

TP5

parent 8373a5a5
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AOS 1 - TP5\n",
"## Regularization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge, LinearRegression, Lasso\n",
"from scipy.linalg import svd\n",
"import numpy as np\n",
"\n",
"from __future__ import print_function\n",
"from ipywidgets import interact, interactive, fixed, interact_manual\n",
"import ipywidgets as widgets\n",
"\n",
"# Modelization\n",
"from utils import Event\n",
"\n",
"n_features = 50\n",
"evt = Event(n_features=n_features, effective_rank=3, noise_level=1)\n",
"\n",
"# Generate samples\n",
"X, y = evt.sample(n_samples=100)\n",
"\n",
"print(X.shape)\n",
"print(y.shape)\n",
"\n",
"print(evt.coefficients)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Question 1\n",
"\n",
"Using a SVD, compute the singular values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"@interact\n",
"def compute_svd(eff_rank = widgets.IntSlider(min=1, max=50)):\n",
" evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)\n",
" X, y = evt.sample(n_samples=100)\n",
" U, S, V = svd(X)\n",
" plt.bar(range(len(S)), S)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see thar the effective rank effects the number of significant singular values : measures the dependance on features.\n",
"\n",
"### Question 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_risk(model, dataX, datay, n_times, X0, evt):\n",
" # Expected value at X0 :\n",
" Y0 = X0 @ evt.coefficients\n",
" preds = []\n",
" # Repeat n_times the predictions & models\n",
" for i in range(n_times):\n",
" # Generate data\n",
" X = dataX[i]\n",
" y = datay[i]\n",
" # Train the model on the sample:\n",
" model.fit(X, y)\n",
" # Predict value at X0 aand add it to preds\n",
" preds.append(model.predict(X0))\n",
"\n",
" # Use preds to estimate bias, variance and risk of the estimator at X0\n",
" print(f'Expected Y0 : {Y0}')\n",
" print(f'Mean of predicted Ys : {np.mean(preds)}')\n",
"\n",
" bias = Y0 - np.mean(preds)\n",
" print(f'Bias : {bias}')\n",
" var = np.var(preds)\n",
" print(f'Var : {var}')\n",
" \n",
"\n",
"def generate_sample(n_times, n_samples, eff_rank):\n",
" # X0 is randomly generated\n",
" X0 = np.random.randn(1, n_features)\n",
"\n",
" # Generate event object\n",
" evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)\n",
" dataX = []\n",
" datay = []\n",
" for i in range(n_times):\n",
" X, y = evt.sample(n_samples)\n",
" dataX.append(X)\n",
" datay.append(y)\n",
" print(f'Mean y = {np.mean(datay)}')\n",
" return n_times, X0, dataX, datay, evt\n",
"\n",
"print(\"Data generation : \")\n",
"\n",
"w = interactive(generate_sample,\n",
" n_times = widgets.IntSlider(min=1, max=1000, value=500),\n",
" n_samples = widgets.IntSlider(min=1, max=200, value=50),\n",
" eff_rank = widgets.IntSlider(min=1, max=50, value=3),\n",
")\n",
"\n",
"display(w)\n",
"n_times, X0, dataX, datay, evt = w.result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def qu2(model, alpha, dataX, datay, n_times, X0, evt):\n",
" if model == 'linear':\n",
" model = LinearRegression()\n",
" print('Using linear model')\n",
" elif model == 'ridge':\n",
" print(f'Using ridge model with alpha = {alpha}')\n",
" model = Ridge(alpha=alpha) # TODO proposer modification\n",
" elif model == 'lasso':\n",
" print(f'Using lasso model with alpha = {alpha}')\n",
" model = Lasso(alpha=alpha)\n",
" else:\n",
" raise 'Error : no model defined'\n",
"\n",
" compute_risk(model, dataX, datay, n_times, X0, evt)\n",
" \n",
"\n",
"interact(qu2,\n",
" model = widgets.Dropdown(\n",
" options = ['linear', 'ridge', 'lasso'],\n",
" value='linear',\n",
" description='model'\n",
" ),\n",
" alpha = widgets.FloatSlider(min=0.01, max=5, step=0.01),\n",
" dataX = widgets.fixed(dataX),\n",
" datay = widgets.fixed(datay),\n",
" n_times = widgets.fixed(n_times),\n",
" X0 = widgets.fixed(X0),\n",
" evt = widgets.fixed(evt)\n",
")"
]
}
],
"metadata": {
"interpreter": {
"hash": "3abb0a1ef4892304d86bb3a3dfd052bcca35057beadba016173999c775e8d3ba"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('AOS1-QteoCFsS': pipenv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
from scipy import linalg
import numpy as np
class Event:
def __init__(self, n_features=10, coefficients=None, tail_strength=0.1,
effective_rank=None, bias=0.0, noise_level=None):
self.n_features = n_features
self.noise_level = noise_level
self.effective_rank = effective_rank
if coefficients is None:
self.coefficients = 10 * np.random.randn(n_features)
else:
self.coefficients = coefficients
v, _ = linalg.qr(np.random.randn(n_features, n_features), mode='economic')
self._v = v
# Index of the singular values
singular_ind = np.arange(n_features, dtype=np.float64)
if self.effective_rank is None:
tail_strength = 1
self.effective_rank = n_features
singular_ind = 10 * singular_ind
# Build the singular profile by assembling signal and noise components
low_rank = ((1 - tail_strength) *
np.exp(-1.0 * (singular_ind / self.effective_rank) ** 2))
tail = tail_strength * np.exp(-0.1 * singular_ind / self.effective_rank)
self._s = np.identity(n_features) * (low_rank + tail)
def sample(self, n_samples=100):
u0 = np.random.randn(max(n_samples, self.n_features), self.n_features)
u, _ = linalg.qr(u0, mode='economic')
X = np.dot(np.dot(u, self._s), self._v.T)
X = X[:n_samples, :]
y = np.dot(X, self.coefficients)
if self.noise_level is not None:
coeffs_norm = linalg.norm(self.coefficients)
y += self.noise_level / 100 * coeffs_norm * np.random.randn(len(y))
return X, y
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment