Commit 9074a236 authored by Rémy Huet's avatar Rémy Huet 💻
Browse files

TP5

parent 8373a5a5
%% Cell type:markdown id: tags:
# AOS 1 - TP5
## Regularization
%% Cell type:code id: tags:
```
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from scipy.linalg import svd
import numpy as np
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
# Modelization
from utils import Event
n_features = 50
evt = Event(n_features=n_features, effective_rank=3, noise_level=1)
# Generate samples
X, y = evt.sample(n_samples=100)
print(X.shape)
print(y.shape)
print(evt.coefficients)
```
%% Cell type:markdown id: tags:
### Question 1
Using a SVD, compute the singular values
%% Cell type:code id: tags:
```
import matplotlib.pyplot as plt
@interact
def compute_svd(eff_rank = widgets.IntSlider(min=1, max=50)):
evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)
X, y = evt.sample(n_samples=100)
U, S, V = svd(X)
plt.bar(range(len(S)), S)
```
%% Cell type:markdown id: tags:
We can see thar the effective rank effects the number of significant singular values : measures the dependance on features.
### Question 2
%% Cell type:code id: tags:
```
def compute_risk(model, dataX, datay, n_times, X0, evt):
# Expected value at X0 :
Y0 = X0 @ evt.coefficients
preds = []
# Repeat n_times the predictions & models
for i in range(n_times):
# Generate data
X = dataX[i]
y = datay[i]
# Train the model on the sample:
model.fit(X, y)
# Predict value at X0 aand add it to preds
preds.append(model.predict(X0))
# Use preds to estimate bias, variance and risk of the estimator at X0
print(f'Expected Y0 : {Y0}')
print(f'Mean of predicted Ys : {np.mean(preds)}')
bias = Y0 - np.mean(preds)
print(f'Bias : {bias}')
var = np.var(preds)
print(f'Var : {var}')
def generate_sample(n_times, n_samples, eff_rank):
# X0 is randomly generated
X0 = np.random.randn(1, n_features)
# Generate event object
evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)
dataX = []
datay = []
for i in range(n_times):
X, y = evt.sample(n_samples)
dataX.append(X)
datay.append(y)
print(f'Mean y = {np.mean(datay)}')
return n_times, X0, dataX, datay, evt
print("Data generation : ")
w = interactive(generate_sample,
n_times = widgets.IntSlider(min=1, max=1000, value=500),
n_samples = widgets.IntSlider(min=1, max=200, value=50),
eff_rank = widgets.IntSlider(min=1, max=50, value=3),
)
display(w)
n_times, X0, dataX, datay, evt = w.result
```
%% Cell type:code id: tags:
```
def qu2(model, alpha, dataX, datay, n_times, X0, evt):
if model == 'linear':
model = LinearRegression()
print('Using linear model')
elif model == 'ridge':
print(f'Using ridge model with alpha = {alpha}')
model = Ridge(alpha=alpha) # TODO proposer modification
elif model == 'lasso':
print(f'Using lasso model with alpha = {alpha}')
model = Lasso(alpha=alpha)
else:
raise 'Error : no model defined'
compute_risk(model, dataX, datay, n_times, X0, evt)
interact(qu2,
model = widgets.Dropdown(
options = ['linear', 'ridge', 'lasso'],
value='linear',
description='model'
),
alpha = widgets.FloatSlider(min=0.01, max=5, step=0.01),
dataX = widgets.fixed(dataX),
datay = widgets.fixed(datay),
n_times = widgets.fixed(n_times),
X0 = widgets.fixed(X0),
evt = widgets.fixed(evt)
)
```
from scipy import linalg
import numpy as np
class Event:
def __init__(self, n_features=10, coefficients=None, tail_strength=0.1,
effective_rank=None, bias=0.0, noise_level=None):
self.n_features = n_features
self.noise_level = noise_level
self.effective_rank = effective_rank
if coefficients is None:
self.coefficients = 10 * np.random.randn(n_features)
else:
self.coefficients = coefficients
v, _ = linalg.qr(np.random.randn(n_features, n_features), mode='economic')
self._v = v
# Index of the singular values
singular_ind = np.arange(n_features, dtype=np.float64)
if self.effective_rank is None:
tail_strength = 1
self.effective_rank = n_features
singular_ind = 10 * singular_ind
# Build the singular profile by assembling signal and noise components
low_rank = ((1 - tail_strength) *
np.exp(-1.0 * (singular_ind / self.effective_rank) ** 2))
tail = tail_strength * np.exp(-0.1 * singular_ind / self.effective_rank)
self._s = np.identity(n_features) * (low_rank + tail)
def sample(self, n_samples=100):
u0 = np.random.randn(max(n_samples, self.n_features), self.n_features)
u, _ = linalg.qr(u0, mode='economic')
X = np.dot(np.dot(u, self._s), self._v.T)
X = X[:n_samples, :]
y = np.dot(X, self.coefficients)
if self.noise_level is not None:
coeffs_norm = linalg.norm(self.coefficients)
y += self.noise_level / 100 * coeffs_norm * np.random.randn(len(y))
return X, y
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment