Commit af286816 authored by Lorys Hamadache's avatar Lorys Hamadache

Version Quasi Finale

parent 7911df21
# Projet NF26 - P19 - UTC # Projet NF26 - P19 - UTC
Authors : Lorys Hamadache et Romain Creuzenet Authors : Lorys Hamadache et Romain Creuzenet
### ToDos ### ToDos
1. Ajouter la nouvelle version du rapport 1. Verifier l'orthographe
2. Ajouter la nouvelle version de la présentation 2. Rendre le projet public
3. Clean Old Version presentation + rapport 3. Confirmer les nouvelles versions rapports + code
4. Mettre la Nouvelle version du main.py 4. Fichier Asos vs CSVs?
5. Clean les csv dans data et remplacer par le fichier Asos? 5. Envoyer au prof
6. Rendre PUBLIC
7. Envoyer au prof
## Comment utiliser le projet ## Comment utiliser le projet
• Pour installer le projet : • Pour installer le projet :
......
This diff is collapsed.
...@@ -8,6 +8,8 @@ import warnings ...@@ -8,6 +8,8 @@ import warnings
import re import re
import os import os
import random import random
#parallelize
from pyspark import SparkContext
# Stats # Stats
import statsmodels.graphics as stm_graphs import statsmodels.graphics as stm_graphs
import pandas as pd import pandas as pd
...@@ -16,7 +18,7 @@ import numpy as np ...@@ -16,7 +18,7 @@ import numpy as np
# Graph map # Graph map
from mpl_toolkits.basemap import Basemap from mpl_toolkits.basemap import Basemap
from pandas.plotting import register_matplotlib_converters from pandas.plotting import register_matplotlib_converters
from datetime import datetime, timedelta from datetime import datetime
register_matplotlib_converters() register_matplotlib_converters()
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
...@@ -27,7 +29,7 @@ def execute_query(query): ...@@ -27,7 +29,7 @@ def execute_query(query):
yield row yield row
def ask_q(possibilities, text="Réponse : "): def ask_q(possibilities, text=">>> "):
"""Demande une question""" """Demande une question"""
answer = None answer = None
while answer not in possibilities: while answer not in possibilities:
...@@ -35,7 +37,7 @@ def ask_q(possibilities, text="Réponse : "): ...@@ -35,7 +37,7 @@ def ask_q(possibilities, text="Réponse : "):
return answer return answer
def ask_d(text="Réponse : "): def ask_d(text=">>> "):
"""Demande une date""" """Demande une date"""
print("Entrez une date sous la forme YYYY-MM-DD HH:mm") print("Entrez une date sous la forme YYYY-MM-DD HH:mm")
print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d'))) print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d')))
...@@ -112,8 +114,7 @@ class Manager: ...@@ -112,8 +114,7 @@ class Manager:
def run(self): def run(self):
"""Chose objective""" """Chose objective"""
# Initialisation # Initialisation
for i in "123": os.makedirs(DIR_OUT, exist_ok=True)
os.makedirs(os.path.join(DIR_OUT, "objectif_{}".format(i)), exist_ok=True)
# Chose objective # Chose objective
print("Choisissez ce que vous voulez faire") print("Choisissez ce que vous voulez faire")
...@@ -146,6 +147,8 @@ class Manager: ...@@ -146,6 +147,8 @@ class Manager:
station = ask_q(stations) station = ask_q(stations)
attr = chose_attr() attr = chose_attr()
# Base
ts = pd.Series() ts = pd.Series()
query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__()) query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
for row in execute_query(query): for row in execute_query(query):
...@@ -162,20 +165,79 @@ class Manager: ...@@ -162,20 +165,79 @@ class Manager:
plt.plot(ts, label=attr) plt.plot(ts, label=attr)
plt.title("Donnees de {} pour la station : {}".format(attr, station)) plt.title("Donnees de {} pour la station : {}".format(attr, station))
plt.legend() plt.legend()
path = os.path.join(DIR_OUT, 'objectif_1', 'graph_{}_{}.png'.format(station, attr)) path = os.path.join(DIR_OUT, 'graph_{}_{}.png'.format(station, attr))
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
res = stm.tsa.seasonal_decompose(ts, freq=15, extrapolate_trend='freq')
#Initialisation SPARK
sc = SparkContext()
#INITIALISATION BY DAY
plt.figure(figsize=(25, 16))
axes = plt.subplot()
axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.xticks(rotation=90)
date_rng = pd.date_range(start='2011-01-01', end='2013-12-31', freq='D')
# Maximum of the Day
ts_max = pd.Series(index = date_rng)
query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
gen_max = sc.parallelize(execute_query(query))
gen_max = gen_max.map(lambda line: ((getattr(line,"time")[0],getattr(line,"time")[1],getattr(line,"time")[2]), getattr(line,attr))).reduceByKey(lambda x,y: max(x,y)).collect()
for a,b in gen_max:
ts_max.loc[datetime(*list(a))] = b
ts_max = ts_max.sort_index()
ts_max = ts_max.interpolate()
plt.plot(ts_max , label="Maximum")
# Minimum of the Day
ts_min = pd.Series(index = date_rng)
query2 = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
gen_min = sc.parallelize(execute_query(query2))
gen_min = gen_min.map(lambda line: ((getattr(line,"time")[0],getattr(line,"time")[1],getattr(line,"time")[2]), getattr(line,attr))).reduceByKey(lambda x,y: min(x,y)).collect()
for a,b in gen_min:
ts_min.loc[datetime(*list(a))] = b
ts_min = ts_min.sort_index()
ts_min = ts_min.interpolate()
plt.plot(ts_min , label="Minimum")
# Average of the Day
ts_avg = pd.Series(index = date_rng)
query3 = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
gen_avg = sc.parallelize(execute_query(query2))
gen_avg = gen_avg.map(lambda line: ((getattr(line,"time")[0],getattr(line,"time")[1],getattr(line,"time")[2]), (getattr(line,attr),1))).reduceByKey(lambda x,y: (x[0] +y[0], x[1]+ y[1])).map(lambda x: (x[0],x[1][0]/x[1][1])).collect()
for a,b in gen_avg:
ts_avg.loc[datetime(*list(a))] = b
ts_avg = ts_avg.sort_index()
ts_avg = ts_avg.interpolate()
plt.plot(ts_avg , label="Moyenne")
# Global Plotting
plt.title("Donnees de {} pour la station : {}".format(attr, station))
plt.legend()
path = os.path.join(DIR_OUT, 'graph_{}_{}_byday.png'.format(station, attr))
plt.savefig(path)
plt.show()
res = stm.tsa.seasonal_decompose(ts_avg.dropna(), freq=365 , extrapolate_trend='freq')
res.plot() res.plot()
path = os.path.join(DIR_OUT, 'objectif_1', 'decompose_{}_{}.png'.format(station, attr)) path = os.path.join(DIR_OUT, 'decompose_{}_{}.png'.format(station, attr))
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
stm_graphs.tsaplots.plot_acf(ts, lags=30) stm_graphs.tsaplots.plot_acf(ts_avg.dropna(), lags=365)
path = os.path.join(DIR_OUT, 'objectif_1', 'acf_{}_{}.png'.format(station, attr)) path = os.path.join(DIR_OUT, 'acf_{}_{}.png'.format(station, attr))
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
def map(self): def map(self):
self.table = "TABLE_TIME" self.table = "TABLE_TIME"
...@@ -218,7 +280,7 @@ class Manager: ...@@ -218,7 +280,7 @@ class Manager:
plt.title(title) plt.title(title)
for elt in ' :-': for elt in ' :-':
title = title.replace(elt, '_') title = title.replace(elt, '_')
path = os.path.join(DIR_OUT, 'objectif_2', title.lower() + '.png') path = os.path.join(DIR_OUT, title.lower() + '.png')
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
...@@ -259,31 +321,25 @@ class Manager: ...@@ -259,31 +321,25 @@ class Manager:
print("Entrez le nombre de cluster voulus") print("Entrez le nombre de cluster voulus")
nb_cluster = ask_int() nb_cluster = ask_int()
# Calc of mean
query = "SELECT station, lon, lat, {attr} FROM {table} WHERE time >= {begin} AND time <= {end} " \
"ALLOW FILTERING".format(
attr=", ".join(ATTRIBUTS.keys()),
table=self.table,
begin=date_begin,
end=date_end
)
stations = {} # station: {'nb': 3, 'attr1': 5, 'attr2': 7, ..., 'lon': 3.27, 'lat': 12} stations = {} # station: {'nb': 3, 'attr1': 5, 'attr2': 7, ..., 'lon': 3.27, 'lat': 12}
datetime_begin = datetime(*list(date_begin)) # Convert datetime for row in execute_query(query):
datetime_end = datetime(*list(date_end)) # Convert datetime if None in [row.station, row.lon, row.lat] + [getattr(row, attr) for attr in ATTRIBUTS.keys()]:
continue
while datetime_begin <= datetime_end: if row.station in stations:
print("Données récupérée pour {}".format(datetime_begin.strftime("%Y-%m-%d %H:%M")), end="\r")
# Calc of mean
query = "SELECT station, lon, lat, {attr} FROM {table} WHERE time = {date}".format(
attr=", ".join(ATTRIBUTS.keys()),
table=self.table,
date=(datetime_begin.year, datetime_begin.month, datetime_begin.day, datetime_begin.hour,
datetime_begin.minute)
)
for row in execute_query(query):
if None in [row.station, row.lon, row.lat] + [getattr(row, attr) for attr in ATTRIBUTS.keys()]:
continue
if row.station not in stations:
stations[row.station] = {'nb': 0, 'lon': row.lon, 'lat': row.lat,
**{key: 0 for key in ATTRIBUTS.keys()}}
for attr in ATTRIBUTS.keys(): for attr in ATTRIBUTS.keys():
stations[row.station][attr] += getattr(row, attr) stations[row.station][attr] += getattr(row, attr)
stations[row.station]['nb'] += 1 stations[row.station]['nb'] += 1
datetime_begin += timedelta(minutes=1) else:
stations[row.station] = {'nb': 1, 'lon': row.lon, 'lat': row.lat,
**{key: 0 for key in ATTRIBUTS.keys()}}
for value in stations.values(): for value in stations.values():
for attr in ATTRIBUTS.keys(): for attr in ATTRIBUTS.keys():
value[attr] = value[attr] / value['nb'] value[attr] = value[attr] / value['nb']
...@@ -295,8 +351,6 @@ class Manager: ...@@ -295,8 +351,6 @@ class Manager:
for _ in range(nb_cluster) for _ in range(nb_cluster)
] ]
print()
print("Clusterisation...")
while old_centroids != new_centroids: while old_centroids != new_centroids:
old_centroids = new_centroids old_centroids = new_centroids
data = [ data = [
...@@ -370,7 +424,7 @@ class Manager: ...@@ -370,7 +424,7 @@ class Manager:
plt.title(title) plt.title(title)
for elt in ' :-': for elt in ' :-':
title = title.replace(elt, '_') title = title.replace(elt, '_')
path = os.path.join(DIR_OUT, 'objectif_3', title.lower() + '.png') path = os.path.join(DIR_OUT, title.lower() + '.png')
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment