main.py 13.2 KB
Newer Older
Romain Creuzenet's avatar
Romain Creuzenet committed
1
"""File to execute to show results"""
Romain Creuzenet's avatar
Romain Creuzenet committed
2
# Data
Romain Creuzenet's avatar
Romain Creuzenet committed
3
from parameters import SESSION, DIR_OUT, START, END, ATTRIBUTS
Romain Creuzenet's avatar
Romain Creuzenet committed
4
# Basic
{}'s avatar
{} committed
5
import matplotlib.dates as mdates
Romain Creuzenet's avatar
Romain Creuzenet committed
6
import matplotlib.pyplot as plt
Romain Creuzenet's avatar
Romain Creuzenet committed
7
import warnings
8
9
import re
import os
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
10
import random
Romain Creuzenet's avatar
Romain Creuzenet committed
11
# Stats
{}'s avatar
{} committed
12
13
14
import statsmodels.graphics as stm_graphs
import pandas as pd 
import statsmodels.api as stm
Romain Creuzenet's avatar
Romain Creuzenet committed
15
16
17
18
import numpy as np
# Graph map
from mpl_toolkits.basemap import Basemap
from pandas.plotting import register_matplotlib_converters
Romain Creuzenet's avatar
Romain Creuzenet committed
19
from datetime import datetime, timedelta
{}'s avatar
{} committed
20

Romain Creuzenet's avatar
Romain Creuzenet committed
21
22
register_matplotlib_converters()
warnings.filterwarnings("ignore")
Romain Creuzenet's avatar
Romain Creuzenet committed
23
24
25
26
27
28
29


def execute_query(query):
    for row in SESSION.execute(query):
        yield row


Romain Creuzenet's avatar
Romain Creuzenet committed
30
def ask_q(possibilities, text="Réponse : "):
Romain Creuzenet's avatar
Romain Creuzenet committed
31
    """Demande une question"""
Romain Creuzenet's avatar
Romain Creuzenet committed
32
33
34
35
36
    answer = None
    while answer not in possibilities:
        answer = input(text)
    return answer

{}'s avatar
{} committed
37

Romain Creuzenet's avatar
Romain Creuzenet committed
38
def ask_d(text="Réponse : "):
Romain Creuzenet's avatar
Romain Creuzenet committed
39
40
    """Demande une date"""
    print("Entrez une date sous la forme YYYY-MM-DD HH:mm")
41
    print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d')))
Romain Creuzenet's avatar
Romain Creuzenet committed
42
43
    date_parser = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2})")
    match = None
{}'s avatar
{} committed
44

Romain Creuzenet's avatar
Romain Creuzenet committed
45
46
    while match is None:
        t = input(text)
{}'s avatar
{} committed
47
        match = date_parser.match(t)
Romain Creuzenet's avatar
Romain Creuzenet committed
48
49
50

    m = match.groupdict()
    result = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute']))
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
51
52
53
54
55
56
57
58
59

    try:
        date = datetime(*list(result))
        if not START < date < END:
            return ask_d(text)
    except ValueError:
        return ask_d(text)
    else:
        return result
Romain Creuzenet's avatar
Romain Creuzenet committed
60

Romain Creuzenet's avatar
Romain Creuzenet committed
61

62
def chose_attr():
Romain Creuzenet's avatar
Romain Creuzenet committed
63
64
65
    """Permet de demander un attribut dans la table"""
    # Search element
    print("Choisissez un élément parmis les suivant :")
Romain Creuzenet's avatar
Romain Creuzenet committed
66
    for code, text in ATTRIBUTS.items():
Romain Creuzenet's avatar
Romain Creuzenet committed
67
        print("\t-", text, ":", code)
Romain Creuzenet's avatar
Romain Creuzenet committed
68
    return ask_q(ATTRIBUTS.keys())
Romain Creuzenet's avatar
Romain Creuzenet committed
69
70


Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
71
72
def ask_int(text=">>> "):
    """Permet de demander un entier"""
Romain Creuzenet's avatar
Romain Creuzenet committed
73
74
75
76
77
78
    answer = ""
    while not answer or not answer.isdigit():
        answer = input(text)
    return int(answer)


Romain Creuzenet's avatar
Romain Creuzenet committed
79
80
81
82
83
84
def generate_color(i):
    colors = 'bgrcmykw'
    if i < len(colors):
        return colors[i]
    else:
        return "#{:06x}".format(random.randint(0, 0xFFFFFF))
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
85
86


Romain Creuzenet's avatar
Romain Creuzenet committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
def initialisation_centroid(data):
    """
        generate 1 centroide
    :param data: station : {attr1: 1, attr : 2...}
    :return: {attr1: 1, attr : 2...}
    """
    r = {}
    for attr in ATTRIBUTS.keys():
        all_attr = [elt[attr] for elt in data.values()]
        mini = int(min(all_attr))
        maxi = int(max(all_attr))
        r[attr] = random.randint(mini, maxi)
    return r


Romain Creuzenet's avatar
Romain Creuzenet committed
102
103
104
class Manager:
    table = None  # table name use by the function

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
105
106
107
108
109
110
111
    # for map
    # data has a precision of 4 decimals
    x_min = -18.42
    x_max = 10.35
    y_min = 25.281898
    y_max = 48.08

Romain Creuzenet's avatar
Romain Creuzenet committed
112
    def run(self):
113
114
        """Chose objective"""
        # Initialisation
Romain Creuzenet's avatar
Romain Creuzenet committed
115
116
        for i in "123":
            os.makedirs(os.path.join(DIR_OUT, "objectif_{}".format(i)), exist_ok=True)
117
118

        # Chose objective
Romain Creuzenet's avatar
Romain Creuzenet committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
        print("Choisissez ce que vous voulez faire")
        print("\t1 - Pour un point donné de l’espace, je veux pouvoir avoir un historique du passé")
        print("\t2 - À un instant donné je veux pouvoir obtenir une carte me représentant n’importe quel indicateur")
        print("\t3 - Pour une période de temps donnée, je veux pouvoir obtenir clusteriser l’espace, et représenter "
              "cette clusterisation")
        decision = {
            "1": "historic",
            "2": "map",
            "3": "cluster"
        }
        answer = ask_q(decision.keys())
        getattr(self, decision[answer])()

    def historic(self):
        self.table = "TABLE_SPACE"
134
        print("=== Choix 1 : Historique ===")
Romain Creuzenet's avatar
Romain Creuzenet committed
135
136

        # Search station
Romain Creuzenet's avatar
Romain Creuzenet committed
137
        stations = []
Romain Creuzenet's avatar
Romain Creuzenet committed
138
        print("Choisissez une station parmis celles-ci:")
Romain Creuzenet's avatar
Romain Creuzenet committed
139
140
141
142
143
144
145
        query = "SELECT DISTINCT station FROM {}".format(self.table)
        for i, row in enumerate(execute_query(query), 1):
            end = "\n" if i % 3 == 0 else ""
            print("\t", row.station, end=end)
            stations.append(row.station)
        print()

Romain Creuzenet's avatar
Romain Creuzenet committed
146
        station = ask_q(stations)
147
        attr = chose_attr()
Romain Creuzenet's avatar
Romain Creuzenet committed
148

{}'s avatar
{} committed
149
        ts = pd.Series()
150
        query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
Romain Creuzenet's avatar
Romain Creuzenet committed
151
        for row in execute_query(query):
Romain Creuzenet's avatar
Romain Creuzenet committed
152
153
154
            value = getattr(row, attr)
            if value is None:
                continue
{}'s avatar
{} committed
155
156
            ts.loc[datetime(*list(row.time))] = value

Romain Creuzenet's avatar
Romain Creuzenet committed
157
        plt.figure(figsize=(25, 16))
{}'s avatar
{} committed
158
159
        axes = plt.subplot()
        axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
Romain Creuzenet's avatar
Romain Creuzenet committed
160
161
        plt.xticks(rotation=90)

{}'s avatar
{} committed
162
        plt.plot(ts, label=attr)
163
        plt.title("Donnees de {} pour la station : {}".format(attr, station))
Romain Creuzenet's avatar
Romain Creuzenet committed
164
        plt.legend()
Romain Creuzenet's avatar
Romain Creuzenet committed
165
        path = os.path.join(DIR_OUT, 'objectif_1', 'graph_{}_{}.png'.format(station, attr))
166
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
167
        plt.show()
{}'s avatar
{} committed
168

Romain Creuzenet's avatar
Romain Creuzenet committed
169
        res = stm.tsa.seasonal_decompose(ts, freq=15, extrapolate_trend='freq')
{}'s avatar
{} committed
170
        res.plot()
Romain Creuzenet's avatar
Romain Creuzenet committed
171
        path = os.path.join(DIR_OUT, 'objectif_1', 'decompose_{}_{}.png'.format(station, attr))
172
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
173
        plt.show()
{}'s avatar
{} committed
174

Romain Creuzenet's avatar
Romain Creuzenet committed
175
        stm_graphs.tsaplots.plot_acf(ts, lags=30)
Romain Creuzenet's avatar
Romain Creuzenet committed
176
        path = os.path.join(DIR_OUT, 'objectif_1', 'acf_{}_{}.png'.format(station, attr))
177
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
178
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
179
180

    def map(self):
{}'s avatar
{} committed
181
        self.table = "TABLE_TIME"
182
        print("=== Choix 2 : Map ===")
{}'s avatar
{} committed
183
184

        date = ask_d()
185
        attr = chose_attr()
{}'s avatar
{} committed
186

Romain Creuzenet's avatar
Romain Creuzenet committed
187
        plt.figure(figsize=(14, 14))
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
188
189
190
191
192
193
194
195
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
{}'s avatar
{} committed
196
        # draw coastlines, country boundaries, fill continents.
Romain Creuzenet's avatar
Romain Creuzenet committed
197
198
199
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
{}'s avatar
{} committed
200
        # draw the edge of the map projection region (the projection limb)
Romain Creuzenet's avatar
Romain Creuzenet committed
201
        the_map.drawmapboundary(fill_color='aqua')
{}'s avatar
{} committed
202
        # draw lat/lon grid lines every 30 degrees.
Romain Creuzenet's avatar
Romain Creuzenet committed
203
204
205
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
206
        date_ok = False  # The date is ok
207
208
209
210
211
212
213
214
215
216
217
218
219
220
        query = "SELECT station, lon, lat, {} FROM {} WHERE time={}".format(attr, self.table, date)
        for row in execute_query(query):
            if getattr(row, "station") is None or getattr(row, attr) is None:
                continue
            date_ok = True
            x, y = the_map(getattr(row, "lon"), getattr(row, "lat"))
            value = getattr(row, attr)
            plt.plot(x, y, 'go')
            plt.annotate(round(value, 1), (x, y))

        title = "Map {} du {}".format(attr, datetime(*list(date)).strftime('%Y-%m-%d %H:%M'))
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
Romain Creuzenet's avatar
Romain Creuzenet committed
221
        path = os.path.join(DIR_OUT, 'objectif_2', title.lower() + '.png')
222
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
223
        plt.show()
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

        # If date is wrong, show some dates for this day
        if not date_ok:
            date_begin = list(date)
            date_begin[3] = date_begin[4] = 0  # set hours and minutes at 0
            date_begin = tuple(date_begin)

            date_end = list(date)
            date_end[3] = 23
            date_end[4] = 59
            date_end = tuple(date_end)
            print("Seules ces heures sont disponibles pour ce jour")

            query = "SELECT DISTINCT time FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING".format(
                self.table,
                date_begin,
                date_end,
            )
Romain Creuzenet's avatar
Romain Creuzenet committed
242
            for row in execute_query(query):
243
244
                resp = list(getattr(row, "time"))
                print(str(resp[3]).zfill(2) + ":" + str(resp[4]).zfill(2), end=" - ")
{}'s avatar
{} committed
245

Romain Creuzenet's avatar
Romain Creuzenet committed
246
247
    def cluster(self):
        self.table = "TABLE_TIME"
{}'s avatar
{} committed
248

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
249
250
        # Ask information from user
        print("=== Choix 3 : CLUSTER ===")
Romain Creuzenet's avatar
Romain Creuzenet committed
251
        print("Vous allez devoir choisir une période de temps. On considéra la moyenne des attributs sur cette "
Romain Creuzenet's avatar
Romain Creuzenet committed
252
              "période de temps")
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
253
254
255
256
257
258
        date_begin = date_end = None
        while date_begin is None or date_begin >= date_end:
            print("La date de départ :")
            date_begin = ask_d()
            print("Entrez la date de fin :")
            date_end = ask_d()
Romain Creuzenet's avatar
Romain Creuzenet committed
259
        print("Entrez le nombre de cluster voulus")
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
260
        nb_cluster = ask_int()
Romain Creuzenet's avatar
Romain Creuzenet committed
261

Romain Creuzenet's avatar
Romain Creuzenet committed
262
        stations = {}  # station: {'nb': 3, 'attr1': 5, 'attr2': 7, ...,  'lon': 3.27, 'lat': 12}
Romain Creuzenet's avatar
Romain Creuzenet committed
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
        datetime_begin = datetime(*list(date_begin))  # Convert datetime
        datetime_end = datetime(*list(date_end))  # Convert datetime

        while datetime_begin <= datetime_end:
            print("Données récupérée pour {}".format(datetime_begin.strftime("%Y-%m-%d %H:%M")), end="\r")
            # Calc of mean
            query = "SELECT station, lon, lat, {attr} FROM {table} WHERE time = {date}".format(
                        attr=", ".join(ATTRIBUTS.keys()),
                        table=self.table,
                        date=(datetime_begin.year, datetime_begin.month, datetime_begin.day, datetime_begin.hour,
                              datetime_begin.minute)
                    )
            for row in execute_query(query):
                if None in [row.station, row.lon, row.lat] + [getattr(row, attr) for attr in ATTRIBUTS.keys()]:
                    continue
                if row.station not in stations:
                    stations[row.station] = {'nb': 0, 'lon': row.lon, 'lat': row.lat,
                                             **{key: 0 for key in ATTRIBUTS.keys()}}

Romain Creuzenet's avatar
Romain Creuzenet committed
282
283
                for attr in ATTRIBUTS.keys():
                    stations[row.station][attr] += getattr(row, attr)
Romain Creuzenet's avatar
Romain Creuzenet committed
284
                stations[row.station]['nb'] += 1
Romain Creuzenet's avatar
Romain Creuzenet committed
285
286
            datetime_begin += timedelta(minutes=1)

Romain Creuzenet's avatar
Romain Creuzenet committed
287
        for value in stations.values():
Romain Creuzenet's avatar
Romain Creuzenet committed
288
289
            for attr in ATTRIBUTS.keys():
                value[attr] = value[attr] / value['nb']
Romain Creuzenet's avatar
Romain Creuzenet committed
290
291

        # Initialisation mean
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
292
293
        old_centroids = None
        new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
294
            initialisation_centroid(stations)
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
295
296
297
            for _ in range(nb_cluster)
        ]

Romain Creuzenet's avatar
Romain Creuzenet committed
298
299
        print()
        print("Clusterisation...")
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
300
301
302
        while old_centroids != new_centroids:
            old_centroids = new_centroids
            data = [
Romain Creuzenet's avatar
Romain Creuzenet committed
303
                {**{attr: 0 for attr in ATTRIBUTS.keys()}, 'nb': 0}
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
304
305
306
                for _ in range(nb_cluster)
            ]
            # could be parallelize
Romain Creuzenet's avatar
Romain Creuzenet committed
307
            for value_station in stations.values():
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
308
                distances = [
Romain Creuzenet's avatar
Romain Creuzenet committed
309
                    sum([(centroid[attr] - value_station[attr]) ** 2 for attr in ATTRIBUTS.keys()])
Romain Creuzenet's avatar
Romain Creuzenet committed
310
                    for centroid in old_centroids
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
311
312
                ]
                i = distances.index(min(distances))
Romain Creuzenet's avatar
Romain Creuzenet committed
313
314
                for attr in ATTRIBUTS.keys():
                    data[i][attr] += value_station[attr]
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
315
316
317
318
319
                data[i]['nb'] += 1
            # end calc parallelize
            if 0 in [value['nb'] for value in data]:
                # cluster empty do it again
                new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
320
                    initialisation_centroid(stations)
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
321
322
323
324
                    for _ in range(nb_cluster)
                ]
            else:
                new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
325
                    {attr: float("{0:.2f}".format(elt[attr] / elt['nb'])) for attr in ATTRIBUTS.keys()}
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
                    for elt in data
                ]

        # configuration map map
        plt.figure(figsize=(14, 14))
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
        # draw coastlines, country boundaries, fill continents.
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
        # draw the edge of the map projection region (the projection limb)
        the_map.drawmapboundary(fill_color='aqua')
        # draw lat/lon grid lines every 30 degrees.
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

Romain Creuzenet's avatar
Romain Creuzenet committed
349
        colors = [generate_color(i) for i in range(nb_cluster)]
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
350
351

        # Add all points
Romain Creuzenet's avatar
Romain Creuzenet committed
352
        for station, value in stations.items():
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
353
354
            # Analyse the point
            distances = [
Romain Creuzenet's avatar
Romain Creuzenet committed
355
                sum([(centroid[attr] - value[attr]) ** 2 for attr in ATTRIBUTS.keys()])
Romain Creuzenet's avatar
Romain Creuzenet committed
356
                for centroid in old_centroids
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
357
358
359
360
            ]
            i = distances.index(min(distances))

            # Add the point
Romain Creuzenet's avatar
Romain Creuzenet committed
361
            x, y = the_map(value['lon'], value['lat'])
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
362
            the_map.plot(x, y, marker=".", color=colors[i])
Romain Creuzenet's avatar
Romain Creuzenet committed
363
            plt.annotate("{}".format(station), (x, y), color=colors[i])
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
364

Romain Creuzenet's avatar
Romain Creuzenet committed
365
        title = "{nb_cluster} clusters du {begin} au {end}".format(
Romain Creuzenet's avatar
Romain Creuzenet committed
366
367
368
            nb_cluster=nb_cluster,
            begin=datetime(*list(date_begin)).strftime('%Y-%m-%d %H:%M'),
            end=datetime(*list(date_end)).strftime('%Y-%m-%d %H:%M')
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
369
370
371
372
        )
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
Romain Creuzenet's avatar
Romain Creuzenet committed
373
        path = os.path.join(DIR_OUT, 'objectif_3', title.lower() + '.png')
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
374
375
        plt.savefig(path)
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
376
377
378
379


if __name__ == "__main__":
    Manager().run()