From 8e23baf1843315a40ba5a8e242a5b261ec223f23 Mon Sep 17 00:00:00 2001 From: Mathilde Rineau Date: Sat, 16 Oct 2021 08:23:18 +0000 Subject: [PATCH] Add new file --- TP6/problem.ipynb | 295 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 TP6/problem.ipynb diff --git a/TP6/problem.ipynb b/TP6/problem.ipynb new file mode 100644 index 0000000..c6442b1 --- /dev/null +++ b/TP6/problem.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5c8980bd", + "metadata": {}, + "source": [ + "# AOS1 Problem\n", + "\n", + "## Mathilde Rineau, Remy Huet \n", + "## 17/10/2021\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9f152334", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(70000, 784)\n", + "(70000,)\n" + ] + } + ], + "source": [ + "#We will work on the mnist data set\n", + "#We load it from fetch_openml\n", + "from sklearn.datasets import fetch_openml\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)\n", + "\n", + "#We print the caracteristics of X and Y\n", + "print(X.shape)\n", + "print(y.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4d3fa1c7", + "metadata": {}, + "outputs": [], + "source": [ + "#We divide the data set in two parts: train set and test set\n", + "#According to the recommended values the train set's size is 60000 and the test set's size is 10000\n", + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, train_size=60000, test_size=10000)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d809fc87", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVC(C=10, degree=5, kernel='poly')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#First, we perform a SVC without preprocessing or improving in terms of accuracy or speed\n", + "from sklearn.svm import SVC\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import accuracy_score\n", + "#we perform the default SVC, with the hyperparameter C=10 and a polynomial kernel of degree 5\n", + "#according to the recommandations\n", + "svc = SVC(C=10, kernel = 'poly', degree = 5)\n", + "svc.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8cb28178", + "metadata": {}, + "outputs": [], + "source": [ + "#We predict the values for our test set\n", + "y_pred = svc.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c1248238", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 923 1 2 0 0 2 3 1 3 0]\n", + " [ 0 1157 4 1 0 1 1 3 2 0]\n", + " [ 7 10 925 4 0 0 5 2 1 0]\n", + " [ 3 7 3 1000 0 10 0 0 7 5]\n", + " [ 1 11 5 1 952 0 1 0 3 8]\n", + " [ 6 9 1 8 0 875 3 1 3 1]\n", + " [ 7 8 0 0 2 7 952 0 1 0]\n", + " [ 1 7 5 1 1 1 0 1070 2 11]\n", + " [ 3 8 4 8 0 10 0 2 905 4]\n", + " [ 2 6 2 5 6 3 0 11 6 957]]\n" + ] + } + ], + "source": [ + "#We compute the confusion matrix \n", + "print(confusion_matrix(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ba4e38ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.97 0.99 0.98 935\n", + " 1 0.95 0.99 0.97 1169\n", + " 2 0.97 0.97 0.97 954\n", + " 3 0.97 0.97 0.97 1035\n", + " 4 0.99 0.97 0.98 982\n", + " 5 0.96 0.96 0.96 907\n", + " 6 0.99 0.97 0.98 977\n", + " 7 0.98 0.97 0.98 1099\n", + " 8 0.97 0.96 0.96 944\n", + " 9 0.97 0.96 0.96 998\n", + "\n", + " accuracy 0.97 10000\n", + " macro avg 0.97 0.97 0.97 10000\n", + "weighted avg 0.97 0.97 0.97 10000\n", + "\n" + ] + } + ], + "source": [ + "#We print the classification report\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "947b0895", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9716\n", + "Error rate: 2.839999999999998 %\n" + ] + } + ], + "source": [ + "#We print the accuracy of the SVC and the error rate \n", + "print(\"Accuracy: \",accuracy_score(y_test, y_pred))\n", + "print(\"Error rate: \",(1-accuracy_score(y_test, y_pred))*100,\"%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "81b09df7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "(8164, 784)\n", + "23.246173469387756\n", + "0.0\n" + ] + } + ], + "source": [ + "#We then generated new training data by translating the resulting support vectors \n", + "#by one pixel in each of four directions\n", + "import numpy as np\n", + "print(svc.support_vectors_)\n", + "print(svc.support_vectors_.shape)\n", + "print(np.mean(svc.support_vectors_[0]))\n", + "print(svc.support_vectors_[0][1])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "0e648133", + "metadata": {}, + "outputs": [], + "source": [ + "def right_side_rescaling(support_vectors):\n", + " n,m = support_vectors.shape\n", + " #print(n,m)\n", + " support_vector_lin =support_vectors.reshape((-1, n*m))\n", + " #print(support_vector_lin.shape)\n", + " temp = support_vector_lin[0][0]\n", + " for i in range (n*m-2):\n", + " #print(support_vector_lin[0][i])\n", + " support_vector_lin[0][i] = support_vector_lin[0][i+1]\n", + " support_vector_lin[0][n*m-1] = temp \n", + " support_vectors = support_vector_lin.reshape(n,m)\n", + " return support_vectors" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "aa5535c9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[2 3 4 5 1]\n", + " [2 3 4 4 1]]\n" + ] + } + ], + "source": [ + "m = []\n", + "m.append([1,2,3,4,5])\n", + "m.append([1,2,3,4,5])\n", + "print(right_side_rescaling(np.array(m)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21db8ae3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bb8ab5a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab