Commit 8e23baf1 authored by Mathilde Rineau's avatar Mathilde Rineau 🙂
Browse files

Add new file

parent 5d87da76
{
"cells": [
{
"cell_type": "markdown",
"id": "5c8980bd",
"metadata": {},
"source": [
"# AOS1 Problem\n",
"\n",
"## Mathilde Rineau, Remy Huet \n",
"## 17/10/2021\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9f152334",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(70000, 784)\n",
"(70000,)\n"
]
}
],
"source": [
"#We will work on the mnist data set\n",
"#We load it from fetch_openml\n",
"from sklearn.datasets import fetch_openml\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)\n",
"\n",
"#We print the caracteristics of X and Y\n",
"print(X.shape)\n",
"print(y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4d3fa1c7",
"metadata": {},
"outputs": [],
"source": [
"#We divide the data set in two parts: train set and test set\n",
"#According to the recommended values the train set's size is 60000 and the test set's size is 10000\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, train_size=60000, test_size=10000)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d809fc87",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SVC(C=10, degree=5, kernel='poly')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#First, we perform a SVC without preprocessing or improving in terms of accuracy or speed\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import accuracy_score\n",
"#we perform the default SVC, with the hyperparameter C=10 and a polynomial kernel of degree 5\n",
"#according to the recommandations\n",
"svc = SVC(C=10, kernel = 'poly', degree = 5)\n",
"svc.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8cb28178",
"metadata": {},
"outputs": [],
"source": [
"#We predict the values for our test set\n",
"y_pred = svc.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "c1248238",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 923 1 2 0 0 2 3 1 3 0]\n",
" [ 0 1157 4 1 0 1 1 3 2 0]\n",
" [ 7 10 925 4 0 0 5 2 1 0]\n",
" [ 3 7 3 1000 0 10 0 0 7 5]\n",
" [ 1 11 5 1 952 0 1 0 3 8]\n",
" [ 6 9 1 8 0 875 3 1 3 1]\n",
" [ 7 8 0 0 2 7 952 0 1 0]\n",
" [ 1 7 5 1 1 1 0 1070 2 11]\n",
" [ 3 8 4 8 0 10 0 2 905 4]\n",
" [ 2 6 2 5 6 3 0 11 6 957]]\n"
]
}
],
"source": [
"#We compute the confusion matrix \n",
"print(confusion_matrix(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ba4e38ac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.97 0.99 0.98 935\n",
" 1 0.95 0.99 0.97 1169\n",
" 2 0.97 0.97 0.97 954\n",
" 3 0.97 0.97 0.97 1035\n",
" 4 0.99 0.97 0.98 982\n",
" 5 0.96 0.96 0.96 907\n",
" 6 0.99 0.97 0.98 977\n",
" 7 0.98 0.97 0.98 1099\n",
" 8 0.97 0.96 0.96 944\n",
" 9 0.97 0.96 0.96 998\n",
"\n",
" accuracy 0.97 10000\n",
" macro avg 0.97 0.97 0.97 10000\n",
"weighted avg 0.97 0.97 0.97 10000\n",
"\n"
]
}
],
"source": [
"#We print the classification report\n",
"print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "947b0895",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.9716\n",
"Error rate: 2.839999999999998 %\n"
]
}
],
"source": [
"#We print the accuracy of the SVC and the error rate \n",
"print(\"Accuracy: \",accuracy_score(y_test, y_pred))\n",
"print(\"Error rate: \",(1-accuracy_score(y_test, y_pred))*100,\"%\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "81b09df7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"(8164, 784)\n",
"23.246173469387756\n",
"0.0\n"
]
}
],
"source": [
"#We then generated new training data by translating the resulting support vectors \n",
"#by one pixel in each of four directions\n",
"import numpy as np\n",
"print(svc.support_vectors_)\n",
"print(svc.support_vectors_.shape)\n",
"print(np.mean(svc.support_vectors_[0]))\n",
"print(svc.support_vectors_[0][1])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "0e648133",
"metadata": {},
"outputs": [],
"source": [
"def right_side_rescaling(support_vectors):\n",
" n,m = support_vectors.shape\n",
" #print(n,m)\n",
" support_vector_lin =support_vectors.reshape((-1, n*m))\n",
" #print(support_vector_lin.shape)\n",
" temp = support_vector_lin[0][0]\n",
" for i in range (n*m-2):\n",
" #print(support_vector_lin[0][i])\n",
" support_vector_lin[0][i] = support_vector_lin[0][i+1]\n",
" support_vector_lin[0][n*m-1] = temp \n",
" support_vectors = support_vector_lin.reshape(n,m)\n",
" return support_vectors"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "aa5535c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[2 3 4 5 1]\n",
" [2 3 4 4 1]]\n"
]
}
],
"source": [
"m = []\n",
"m.append([1,2,3,4,5])\n",
"m.append([1,2,3,4,5])\n",
"print(right_side_rescaling(np.array(m)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21db8ae3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bb8ab5a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment