Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Rémy Huet
AOS1
Commits
9074a236
Commit
9074a236
authored
Oct 06, 2021
by
Rémy Huet
💻
Browse files
TP5
parent
8373a5a5
Changes
2
Hide whitespace changes
Inline
Side-by-side
TP5/TP5.ipynb
0 → 100644
View file @
9074a236
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AOS 1 - TP5\n",
"## Regularization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge, LinearRegression, Lasso\n",
"from scipy.linalg import svd\n",
"import numpy as np\n",
"\n",
"from __future__ import print_function\n",
"from ipywidgets import interact, interactive, fixed, interact_manual\n",
"import ipywidgets as widgets\n",
"\n",
"# Modelization\n",
"from utils import Event\n",
"\n",
"n_features = 50\n",
"evt = Event(n_features=n_features, effective_rank=3, noise_level=1)\n",
"\n",
"# Generate samples\n",
"X, y = evt.sample(n_samples=100)\n",
"\n",
"print(X.shape)\n",
"print(y.shape)\n",
"\n",
"print(evt.coefficients)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Question 1\n",
"\n",
"Using a SVD, compute the singular values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"@interact\n",
"def compute_svd(eff_rank = widgets.IntSlider(min=1, max=50)):\n",
" evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)\n",
" X, y = evt.sample(n_samples=100)\n",
" U, S, V = svd(X)\n",
" plt.bar(range(len(S)), S)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see thar the effective rank effects the number of significant singular values : measures the dependance on features.\n",
"\n",
"### Question 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def compute_risk(model, dataX, datay, n_times, X0, evt):\n",
" # Expected value at X0 :\n",
" Y0 = X0 @ evt.coefficients\n",
" preds = []\n",
" # Repeat n_times the predictions & models\n",
" for i in range(n_times):\n",
" # Generate data\n",
" X = dataX[i]\n",
" y = datay[i]\n",
" # Train the model on the sample:\n",
" model.fit(X, y)\n",
" # Predict value at X0 aand add it to preds\n",
" preds.append(model.predict(X0))\n",
"\n",
" # Use preds to estimate bias, variance and risk of the estimator at X0\n",
" print(f'Expected Y0 : {Y0}')\n",
" print(f'Mean of predicted Ys : {np.mean(preds)}')\n",
"\n",
" bias = Y0 - np.mean(preds)\n",
" print(f'Bias : {bias}')\n",
" var = np.var(preds)\n",
" print(f'Var : {var}')\n",
" \n",
"\n",
"def generate_sample(n_times, n_samples, eff_rank):\n",
" # X0 is randomly generated\n",
" X0 = np.random.randn(1, n_features)\n",
"\n",
" # Generate event object\n",
" evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)\n",
" dataX = []\n",
" datay = []\n",
" for i in range(n_times):\n",
" X, y = evt.sample(n_samples)\n",
" dataX.append(X)\n",
" datay.append(y)\n",
" print(f'Mean y = {np.mean(datay)}')\n",
" return n_times, X0, dataX, datay, evt\n",
"\n",
"print(\"Data generation : \")\n",
"\n",
"w = interactive(generate_sample,\n",
" n_times = widgets.IntSlider(min=1, max=1000, value=500),\n",
" n_samples = widgets.IntSlider(min=1, max=200, value=50),\n",
" eff_rank = widgets.IntSlider(min=1, max=50, value=3),\n",
")\n",
"\n",
"display(w)\n",
"n_times, X0, dataX, datay, evt = w.result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def qu2(model, alpha, dataX, datay, n_times, X0, evt):\n",
" if model == 'linear':\n",
" model = LinearRegression()\n",
" print('Using linear model')\n",
" elif model == 'ridge':\n",
" print(f'Using ridge model with alpha = {alpha}')\n",
" model = Ridge(alpha=alpha) # TODO proposer modification\n",
" elif model == 'lasso':\n",
" print(f'Using lasso model with alpha = {alpha}')\n",
" model = Lasso(alpha=alpha)\n",
" else:\n",
" raise 'Error : no model defined'\n",
"\n",
" compute_risk(model, dataX, datay, n_times, X0, evt)\n",
" \n",
"\n",
"interact(qu2,\n",
" model = widgets.Dropdown(\n",
" options = ['linear', 'ridge', 'lasso'],\n",
" value='linear',\n",
" description='model'\n",
" ),\n",
" alpha = widgets.FloatSlider(min=0.01, max=5, step=0.01),\n",
" dataX = widgets.fixed(dataX),\n",
" datay = widgets.fixed(datay),\n",
" n_times = widgets.fixed(n_times),\n",
" X0 = widgets.fixed(X0),\n",
" evt = widgets.fixed(evt)\n",
")"
]
}
],
"metadata": {
"interpreter": {
"hash": "3abb0a1ef4892304d86bb3a3dfd052bcca35057beadba016173999c775e8d3ba"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('AOS1-QteoCFsS': pipenv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:markdown id: tags:
# AOS 1 - TP5
## Regularization
%% Cell type:code id: tags:
```
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from scipy.linalg import svd
import numpy as np
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
# Modelization
from utils import Event
n_features = 50
evt = Event(n_features=n_features, effective_rank=3, noise_level=1)
# Generate samples
X, y = evt.sample(n_samples=100)
print(X.shape)
print(y.shape)
print(evt.coefficients)
```
%% Cell type:markdown id: tags:
### Question 1
Using a SVD, compute the singular values
%% Cell type:code id: tags:
```
import matplotlib.pyplot as plt
@interact
def compute_svd(eff_rank = widgets.IntSlider(min=1, max=50)):
evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)
X, y = evt.sample(n_samples=100)
U, S, V = svd(X)
plt.bar(range(len(S)), S)
```
%% Cell type:markdown id: tags:
We can see thar the effective rank effects the number of significant singular values : measures the dependance on features.
### Question 2
%% Cell type:code id: tags:
```
def compute_risk(model, dataX, datay, n_times, X0, evt):
# Expected value at X0 :
Y0 = X0 @ evt.coefficients
preds = []
# Repeat n_times the predictions & models
for i in range(n_times):
# Generate data
X = dataX[i]
y = datay[i]
# Train the model on the sample:
model.fit(X, y)
# Predict value at X0 aand add it to preds
preds.append(model.predict(X0))
# Use preds to estimate bias, variance and risk of the estimator at X0
print(f'Expected Y0 : {Y0}')
print(f'Mean of predicted Ys : {np.mean(preds)}')
bias = Y0 - np.mean(preds)
print(f'Bias : {bias}')
var = np.var(preds)
print(f'Var : {var}')
def generate_sample(n_times, n_samples, eff_rank):
# X0 is randomly generated
X0 = np.random.randn(1, n_features)
# Generate event object
evt = Event(n_features=n_features, effective_rank=eff_rank, noise_level=1)
dataX = []
datay = []
for i in range(n_times):
X, y = evt.sample(n_samples)
dataX.append(X)
datay.append(y)
print(f'Mean y = {np.mean(datay)}')
return n_times, X0, dataX, datay, evt
print("Data generation : ")
w = interactive(generate_sample,
n_times = widgets.IntSlider(min=1, max=1000, value=500),
n_samples = widgets.IntSlider(min=1, max=200, value=50),
eff_rank = widgets.IntSlider(min=1, max=50, value=3),
)
display(w)
n_times, X0, dataX, datay, evt = w.result
```
%% Cell type:code id: tags:
```
def
qu2
(
model
,
alpha
,
dataX
,
datay
,
n_times
,
X0
,
evt
):
if
model
==
'linear'
:
model
=
LinearRegression
()
print
(
'Using linear model'
)
elif
model
==
'ridge'
:
print
(
f
'Using ridge model with alpha = {alpha}'
)
model
=
Ridge
(
alpha
=
alpha
)
#
TODO
proposer
modification
elif
model
==
'lasso'
:
print
(
f
'Using lasso model with alpha = {alpha}'
)
model
=
Lasso
(
alpha
=
alpha
)
else
:
raise
'Error : no model defined'
compute_risk
(
model
,
dataX
,
datay
,
n_times
,
X0
,
evt
)
interact
(
qu2
,
model
=
widgets
.
Dropdown
(
options
=
[
'linear'
,
'ridge'
,
'lasso'
],
value
=
'linear'
,
description
=
'model'
),
alpha
=
widgets
.
FloatSlider
(
min
=
0.01
,
max
=
5
,
step
=
0.01
),
dataX
=
widgets
.
fixed
(
dataX
),
datay
=
widgets
.
fixed
(
datay
),
n_times
=
widgets
.
fixed
(
n_times
),
X0
=
widgets
.
fixed
(
X0
),
evt
=
widgets
.
fixed
(
evt
)
)
```
TP5/utils.py
0 → 100644
View file @
9074a236
from
scipy
import
linalg
import
numpy
as
np
class
Event
:
def
__init__
(
self
,
n_features
=
10
,
coefficients
=
None
,
tail_strength
=
0.1
,
effective_rank
=
None
,
bias
=
0.0
,
noise_level
=
None
):
self
.
n_features
=
n_features
self
.
noise_level
=
noise_level
self
.
effective_rank
=
effective_rank
if
coefficients
is
None
:
self
.
coefficients
=
10
*
np
.
random
.
randn
(
n_features
)
else
:
self
.
coefficients
=
coefficients
v
,
_
=
linalg
.
qr
(
np
.
random
.
randn
(
n_features
,
n_features
),
mode
=
'economic'
)
self
.
_v
=
v
# Index of the singular values
singular_ind
=
np
.
arange
(
n_features
,
dtype
=
np
.
float64
)
if
self
.
effective_rank
is
None
:
tail_strength
=
1
self
.
effective_rank
=
n_features
singular_ind
=
10
*
singular_ind
# Build the singular profile by assembling signal and noise components
low_rank
=
((
1
-
tail_strength
)
*
np
.
exp
(
-
1.0
*
(
singular_ind
/
self
.
effective_rank
)
**
2
))
tail
=
tail_strength
*
np
.
exp
(
-
0.1
*
singular_ind
/
self
.
effective_rank
)
self
.
_s
=
np
.
identity
(
n_features
)
*
(
low_rank
+
tail
)
def
sample
(
self
,
n_samples
=
100
):
u0
=
np
.
random
.
randn
(
max
(
n_samples
,
self
.
n_features
),
self
.
n_features
)
u
,
_
=
linalg
.
qr
(
u0
,
mode
=
'economic'
)
X
=
np
.
dot
(
np
.
dot
(
u
,
self
.
_s
),
self
.
_v
.
T
)
X
=
X
[:
n_samples
,
:]
y
=
np
.
dot
(
X
,
self
.
coefficients
)
if
self
.
noise_level
is
not
None
:
coeffs_norm
=
linalg
.
norm
(
self
.
coefficients
)
y
+=
self
.
noise_level
/
100
*
coeffs_norm
*
np
.
random
.
randn
(
len
(
y
))
return
X
,
y
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment