"""File to execute to show results""" # Data from parameters import SESSION, DIR_OUT, START, END, ATTRIBUTS # Basic import matplotlib.dates as mdates import matplotlib.pyplot as plt import warnings import re import os import random # Stats import statsmodels.graphics as stm_graphs import pandas as pd import statsmodels.api as stm import numpy as np # Graph map from mpl_toolkits.basemap import Basemap from pandas.plotting import register_matplotlib_converters from datetime import datetime, timedelta register_matplotlib_converters() warnings.filterwarnings("ignore") def execute_query(query): for row in SESSION.execute(query): yield row def ask_q(possibilities, text="Réponse : "): """Demande une question""" answer = None while answer not in possibilities: answer = input(text) return answer def ask_d(text="Réponse : "): """Demande une date""" print("Entrez une date sous la forme YYYY-MM-DD HH:mm") print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d'))) date_parser = re.compile(r"(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P\d{2}):(?P\d{2})") match = None while match is None: t = input(text) match = date_parser.match(t) m = match.groupdict() result = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute'])) try: date = datetime(*list(result)) if not START < date < END: return ask_d(text) except ValueError: return ask_d(text) else: return result def chose_attr(): """Permet de demander un attribut dans la table""" # Search element print("Choisissez un élément parmis les suivant :") for code, text in ATTRIBUTS.items(): print("\t-", text, ":", code) return ask_q(ATTRIBUTS.keys()) def ask_int(text=">>> "): """Permet de demander un entier""" answer = "" while not answer or not answer.isdigit(): answer = input(text) return int(answer) def generate_color(i): colors = 'bgrcmykw' if i < len(colors): return colors[i] else: return "#{:06x}".format(random.randint(0, 0xFFFFFF)) def initialisation_centroid(data): """ generate 1 centroide :param data: station : {attr1: 1, attr : 2...} :return: {attr1: 1, attr : 2...} """ r = {} for attr in ATTRIBUTS.keys(): all_attr = [elt[attr] for elt in data.values()] mini = int(min(all_attr)) maxi = int(max(all_attr)) r[attr] = random.randint(mini, maxi) return r class Manager: table = None # table name use by the function # for map # data has a precision of 4 decimals x_min = -18.42 x_max = 10.35 y_min = 25.281898 y_max = 48.08 def run(self): """Chose objective""" # Initialisation for i in "123": os.makedirs(os.path.join(DIR_OUT, "objectif_{}".format(i)), exist_ok=True) # Chose objective print("Choisissez ce que vous voulez faire") print("\t1 - Pour un point donné de l’espace, je veux pouvoir avoir un historique du passé") print("\t2 - À un instant donné je veux pouvoir obtenir une carte me représentant n’importe quel indicateur") print("\t3 - Pour une période de temps donnée, je veux pouvoir obtenir clusteriser l’espace, et représenter " "cette clusterisation") decision = { "1": "historic", "2": "map", "3": "cluster" } answer = ask_q(decision.keys()) getattr(self, decision[answer])() def historic(self): self.table = "TABLE_SPACE" print("=== Choix 1 : Historique ===") # Search station stations = [] print("Choisissez une station parmis celles-ci:") query = "SELECT DISTINCT station FROM {}".format(self.table) for i, row in enumerate(execute_query(query), 1): end = "\n" if i % 3 == 0 else "" print("\t", row.station, end=end) stations.append(row.station) print() station = ask_q(stations) attr = chose_attr() ts = pd.Series() query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__()) for row in execute_query(query): value = getattr(row, attr) if value is None: continue ts.loc[datetime(*list(row.time))] = value plt.figure(figsize=(25, 16)) axes = plt.subplot() axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) plt.xticks(rotation=90) plt.plot(ts, label=attr) plt.title("Donnees de {} pour la station : {}".format(attr, station)) plt.legend() path = os.path.join(DIR_OUT, 'objectif_1', 'graph_{}_{}.png'.format(station, attr)) plt.savefig(path) plt.show() res = stm.tsa.seasonal_decompose(ts, freq=15, extrapolate_trend='freq') res.plot() path = os.path.join(DIR_OUT, 'objectif_1', 'decompose_{}_{}.png'.format(station, attr)) plt.savefig(path) plt.show() stm_graphs.tsaplots.plot_acf(ts, lags=30) path = os.path.join(DIR_OUT, 'objectif_1', 'acf_{}_{}.png'.format(station, attr)) plt.savefig(path) plt.show() def map(self): self.table = "TABLE_TIME" print("=== Choix 2 : Map ===") date = ask_d() attr = chose_attr() plt.figure(figsize=(14, 14)) the_map = Basemap( projection='mill', llcrnrlat=self.y_min, llcrnrlon=self.x_min, urcrnrlat=self.y_max, urcrnrlon=self.x_max, resolution='l' ) # draw coastlines, country boundaries, fill continents. the_map.drawcoastlines(linewidth=0.25) the_map.drawcountries(linewidth=0.25) the_map.fillcontinents(color='coral', lake_color='aqua') # draw the edge of the map projection region (the projection limb) the_map.drawmapboundary(fill_color='aqua') # draw lat/lon grid lines every 30 degrees. the_map.drawmeridians(np.arange(0, 360, 30)) the_map.drawparallels(np.arange(-90, 90, 30)) date_ok = False # The date is ok query = "SELECT station, lon, lat, {} FROM {} WHERE time={}".format(attr, self.table, date) for row in execute_query(query): if getattr(row, "station") is None or getattr(row, attr) is None: continue date_ok = True x, y = the_map(getattr(row, "lon"), getattr(row, "lat")) value = getattr(row, attr) plt.plot(x, y, 'go') plt.annotate(round(value, 1), (x, y)) title = "Map {} du {}".format(attr, datetime(*list(date)).strftime('%Y-%m-%d %H:%M')) plt.title(title) for elt in ' :-': title = title.replace(elt, '_') path = os.path.join(DIR_OUT, 'objectif_2', title.lower() + '.png') plt.savefig(path) plt.show() # If date is wrong, show some dates for this day if not date_ok: date_begin = list(date) date_begin[3] = date_begin[4] = 0 # set hours and minutes at 0 date_begin = tuple(date_begin) date_end = list(date) date_end[3] = 23 date_end[4] = 59 date_end = tuple(date_end) print("Seules ces heures sont disponibles pour ce jour") query = "SELECT DISTINCT time FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING".format( self.table, date_begin, date_end, ) for row in execute_query(query): resp = list(getattr(row, "time")) print(str(resp[3]).zfill(2) + ":" + str(resp[4]).zfill(2), end=" - ") def cluster(self): self.table = "TABLE_TIME" # Ask information from user print("=== Choix 3 : CLUSTER ===") print("Vous allez devoir choisir une période de temps. On considéra la moyenne des attributs sur cette " "période de temps") date_begin = date_end = None while date_begin is None or date_begin >= date_end: print("La date de départ :") date_begin = ask_d() print("Entrez la date de fin :") date_end = ask_d() print("Entrez le nombre de cluster voulus") nb_cluster = ask_int() stations = {} # station: {'nb': 3, 'attr1': 5, 'attr2': 7, ..., 'lon': 3.27, 'lat': 12} datetime_begin = datetime(*list(date_begin)) # Convert datetime datetime_end = datetime(*list(date_end)) # Convert datetime while datetime_begin <= datetime_end: print("Données récupérée pour {}".format(datetime_begin.strftime("%Y-%m-%d %H:%M")), end="\r") # Calc of mean query = "SELECT station, lon, lat, {attr} FROM {table} WHERE time = {date}".format( attr=", ".join(ATTRIBUTS.keys()), table=self.table, date=(datetime_begin.year, datetime_begin.month, datetime_begin.day, datetime_begin.hour, datetime_begin.minute) ) for row in execute_query(query): if None in [row.station, row.lon, row.lat] + [getattr(row, attr) for attr in ATTRIBUTS.keys()]: continue if row.station not in stations: stations[row.station] = {'nb': 0, 'lon': row.lon, 'lat': row.lat, **{key: 0 for key in ATTRIBUTS.keys()}} for attr in ATTRIBUTS.keys(): stations[row.station][attr] += getattr(row, attr) stations[row.station]['nb'] += 1 datetime_begin += timedelta(minutes=1) for value in stations.values(): for attr in ATTRIBUTS.keys(): value[attr] = value[attr] / value['nb'] # Initialisation mean old_centroids = None new_centroids = [ initialisation_centroid(stations) for _ in range(nb_cluster) ] print() print("Clusterisation...") while old_centroids != new_centroids: old_centroids = new_centroids data = [ {**{attr: 0 for attr in ATTRIBUTS.keys()}, 'nb': 0} for _ in range(nb_cluster) ] # could be parallelize for value_station in stations.values(): distances = [ sum([(centroid[attr] - value_station[attr]) ** 2 for attr in ATTRIBUTS.keys()]) for centroid in old_centroids ] i = distances.index(min(distances)) for attr in ATTRIBUTS.keys(): data[i][attr] += value_station[attr] data[i]['nb'] += 1 # end calc parallelize if 0 in [value['nb'] for value in data]: # cluster empty do it again new_centroids = [ initialisation_centroid(stations) for _ in range(nb_cluster) ] else: new_centroids = [ {attr: float("{0:.2f}".format(elt[attr] / elt['nb'])) for attr in ATTRIBUTS.keys()} for elt in data ] # configuration map map plt.figure(figsize=(14, 14)) the_map = Basemap( projection='mill', llcrnrlat=self.y_min, llcrnrlon=self.x_min, urcrnrlat=self.y_max, urcrnrlon=self.x_max, resolution='l' ) # draw coastlines, country boundaries, fill continents. the_map.drawcoastlines(linewidth=0.25) the_map.drawcountries(linewidth=0.25) the_map.fillcontinents(color='coral', lake_color='aqua') # draw the edge of the map projection region (the projection limb) the_map.drawmapboundary(fill_color='aqua') # draw lat/lon grid lines every 30 degrees. the_map.drawmeridians(np.arange(0, 360, 30)) the_map.drawparallels(np.arange(-90, 90, 30)) colors = [generate_color(i) for i in range(nb_cluster)] # Add all points for station, value in stations.items(): # Analyse the point distances = [ sum([(centroid[attr] - value[attr]) ** 2 for attr in ATTRIBUTS.keys()]) for centroid in old_centroids ] i = distances.index(min(distances)) # Add the point x, y = the_map(value['lon'], value['lat']) the_map.plot(x, y, marker=".", color=colors[i]) plt.annotate("{}".format(station), (x, y), color=colors[i]) title = "{nb_cluster} clusters du {begin} au {end}".format( nb_cluster=nb_cluster, begin=datetime(*list(date_begin)).strftime('%Y-%m-%d %H:%M'), end=datetime(*list(date_end)).strftime('%Y-%m-%d %H:%M') ) plt.title(title) for elt in ' :-': title = title.replace(elt, '_') path = os.path.join(DIR_OUT, 'objectif_3', title.lower() + '.png') plt.savefig(path) plt.show() if __name__ == "__main__": Manager().run()