main.py 12.9 KB
Newer Older
Romain Creuzenet's avatar
Romain Creuzenet committed
1
"""File to execute to show results"""
Romain Creuzenet's avatar
Romain Creuzenet committed
2
# Data
3
from parameters import SESSION, DIR_OUT, START, END
Romain Creuzenet's avatar
Romain Creuzenet committed
4
# Basic
{}'s avatar
{} committed
5
import matplotlib.dates as mdates
Romain Creuzenet's avatar
Romain Creuzenet committed
6
import matplotlib.pyplot as plt
Romain Creuzenet's avatar
Romain Creuzenet committed
7
import warnings
8
9
import re
import os
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
10
import random
Romain Creuzenet's avatar
Romain Creuzenet committed
11
# Stats
{}'s avatar
{} committed
12
13
14
import statsmodels.graphics as stm_graphs
import pandas as pd 
import statsmodels.api as stm
Romain Creuzenet's avatar
Romain Creuzenet committed
15
16
17
18
19
import numpy as np
# Graph map
from mpl_toolkits.basemap import Basemap
from pandas.plotting import register_matplotlib_converters
from datetime import datetime
{}'s avatar
{} committed
20

Romain Creuzenet's avatar
Romain Creuzenet committed
21
22
register_matplotlib_converters()
warnings.filterwarnings("ignore")
Romain Creuzenet's avatar
Romain Creuzenet committed
23
24
25
26
27
28
29
30


def execute_query(query):
    for row in SESSION.execute(query):
        yield row


def ask_q(possibilities, text=">>> "):
Romain Creuzenet's avatar
Romain Creuzenet committed
31
    """Demande une question"""
Romain Creuzenet's avatar
Romain Creuzenet committed
32
33
34
35
36
    answer = None
    while answer not in possibilities:
        answer = input(text)
    return answer

{}'s avatar
{} committed
37

Romain Creuzenet's avatar
Romain Creuzenet committed
38
39
40
def ask_d(text=">>> "):
    """Demande une date"""
    print("Entrez une date sous la forme YYYY-MM-DD HH:mm")
41
    print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d')))
Romain Creuzenet's avatar
Romain Creuzenet committed
42
43
    date_parser = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2})")
    match = None
{}'s avatar
{} committed
44

Romain Creuzenet's avatar
Romain Creuzenet committed
45
46
    while match is None:
        t = input(text)
{}'s avatar
{} committed
47
        match = date_parser.match(t)
Romain Creuzenet's avatar
Romain Creuzenet committed
48
49
50

    m = match.groupdict()
    result = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute']))
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
51
52
53
54
55
56
57
58
59

    try:
        date = datetime(*list(result))
        if not START < date < END:
            return ask_d(text)
    except ValueError:
        return ask_d(text)
    else:
        return result
Romain Creuzenet's avatar
Romain Creuzenet committed
60

Romain Creuzenet's avatar
Romain Creuzenet committed
61

62
def chose_attr():
Romain Creuzenet's avatar
Romain Creuzenet committed
63
64
65
66
67
68
69
70
71
72
73
74
    """Permet de demander un attribut dans la table"""
    # Search element
    decision = {
        "tmpf": "La témparature",
        "relh": "L'humidité"
    }
    print("Choisissez un élément parmis les suivant :")
    for code, text in decision.items():
        print("\t-", text, ":", code)
    return ask_q(decision.keys())


Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
75
76
def ask_int(text=">>> "):
    """Permet de demander un entier"""
Romain Creuzenet's avatar
Romain Creuzenet committed
77
78
79
80
81
82
    answer = ""
    while not answer or not answer.isdigit():
        answer = input(text)
    return int(answer)


Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
def generate_color():
    return "#{:06x}".format(random.randint(0, 0xFFFFFF))


def clear_zone(zone):
    """
        Permet de néttoyer une zone
    :param zone: list de tuple x, y
    :return: lats, lons
    """
    xs, ys = [], []

    for x, y in zone:
        xs.append(x)
        ys.append(y)

    x_min = min(xs)
    y_min = min(ys)
    x_max = max(xs)
    y_max = max(ys)

    lons = [x_min, x_max, x_max, x_min, x_min]
    lats = [y_max, y_max, y_min, y_min, y_max]
    return lats, lons


Romain Creuzenet's avatar
Romain Creuzenet committed
109
110
111
class Manager:
    table = None  # table name use by the function

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
112
113
114
115
116
117
118
    # for map
    # data has a precision of 4 decimals
    x_min = -18.42
    x_max = 10.35
    y_min = 25.281898
    y_max = 48.08

Romain Creuzenet's avatar
Romain Creuzenet committed
119
    def run(self):
120
121
122
123
124
        """Chose objective"""
        # Initialisation
        os.makedirs(DIR_OUT, exist_ok=True)

        # Chose objective
Romain Creuzenet's avatar
Romain Creuzenet committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
        print("Choisissez ce que vous voulez faire")
        print("\t1 - Pour un point donné de l’espace, je veux pouvoir avoir un historique du passé")
        print("\t2 - À un instant donné je veux pouvoir obtenir une carte me représentant n’importe quel indicateur")
        print("\t3 - Pour une période de temps donnée, je veux pouvoir obtenir clusteriser l’espace, et représenter "
              "cette clusterisation")
        decision = {
            "1": "historic",
            "2": "map",
            "3": "cluster"
        }
        answer = ask_q(decision.keys())
        getattr(self, decision[answer])()

    def historic(self):
        self.table = "TABLE_SPACE"
140
        print("=== Choix 1 : Historique ===")
Romain Creuzenet's avatar
Romain Creuzenet committed
141
142

        # Search station
Romain Creuzenet's avatar
Romain Creuzenet committed
143
        stations = []
Romain Creuzenet's avatar
Romain Creuzenet committed
144
        print("Choisissez une station parmis celles-ci:")
Romain Creuzenet's avatar
Romain Creuzenet committed
145
146
147
148
149
150
151
        query = "SELECT DISTINCT station FROM {}".format(self.table)
        for i, row in enumerate(execute_query(query), 1):
            end = "\n" if i % 3 == 0 else ""
            print("\t", row.station, end=end)
            stations.append(row.station)
        print()

Romain Creuzenet's avatar
Romain Creuzenet committed
152
        station = ask_q(stations)
153
        attr = chose_attr()
Romain Creuzenet's avatar
Romain Creuzenet committed
154

{}'s avatar
{} committed
155
        ts = pd.Series()
156
        query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
Romain Creuzenet's avatar
Romain Creuzenet committed
157
        for row in execute_query(query):
Romain Creuzenet's avatar
Romain Creuzenet committed
158
159
160
            value = getattr(row, attr)
            if value is None:
                continue
{}'s avatar
{} committed
161
162
            ts.loc[datetime(*list(row.time))] = value

Romain Creuzenet's avatar
Romain Creuzenet committed
163
        plt.figure(figsize=(25, 16))
{}'s avatar
{} committed
164
165
        axes = plt.subplot()
        axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
Romain Creuzenet's avatar
Romain Creuzenet committed
166
167
        plt.xticks(rotation=90)

{}'s avatar
{} committed
168
        plt.plot(ts, label=attr)
169
        plt.title("Donnees de {} pour la station : {}".format(attr, station))
Romain Creuzenet's avatar
Romain Creuzenet committed
170
        plt.legend()
171
172
        path = os.path.join(DIR_OUT, 'graph_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
173
        plt.show()
{}'s avatar
{} committed
174

Romain Creuzenet's avatar
Romain Creuzenet committed
175
        res = stm.tsa.seasonal_decompose(ts, freq=15, extrapolate_trend='freq')
{}'s avatar
{} committed
176
        res.plot()
177
178
        path = os.path.join(DIR_OUT, 'decompose_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
179
        plt.show()
{}'s avatar
{} committed
180

Romain Creuzenet's avatar
Romain Creuzenet committed
181
        stm_graphs.tsaplots.plot_acf(ts, lags=30)
182
183
        path = os.path.join(DIR_OUT, 'acf_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
184
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
185
186

    def map(self):
{}'s avatar
{} committed
187
        self.table = "TABLE_TIME"
188
        print("=== Choix 2 : Map ===")
{}'s avatar
{} committed
189
190

        date = ask_d()
191
        attr = chose_attr()
{}'s avatar
{} committed
192

Romain Creuzenet's avatar
Romain Creuzenet committed
193
        plt.figure(figsize=(14, 14))
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
194
195
196
197
198
199
200
201
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
{}'s avatar
{} committed
202
        # draw coastlines, country boundaries, fill continents.
Romain Creuzenet's avatar
Romain Creuzenet committed
203
204
205
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
{}'s avatar
{} committed
206
        # draw the edge of the map projection region (the projection limb)
Romain Creuzenet's avatar
Romain Creuzenet committed
207
        the_map.drawmapboundary(fill_color='aqua')
{}'s avatar
{} committed
208
        # draw lat/lon grid lines every 30 degrees.
Romain Creuzenet's avatar
Romain Creuzenet committed
209
210
211
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
212
        date_ok = False  # The date is ok
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
        query = "SELECT station, lon, lat, {} FROM {} WHERE time={}".format(attr, self.table, date)
        for row in execute_query(query):
            if getattr(row, "station") is None or getattr(row, attr) is None:
                continue
            date_ok = True
            x, y = the_map(getattr(row, "lon"), getattr(row, "lat"))
            value = getattr(row, attr)
            plt.plot(x, y, 'go')
            plt.annotate(round(value, 1), (x, y))

        title = "Map {} du {}".format(attr, datetime(*list(date)).strftime('%Y-%m-%d %H:%M'))
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
        path = os.path.join(DIR_OUT, title.lower() + '.png')
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
229
        plt.show()
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

        # If date is wrong, show some dates for this day
        if not date_ok:
            date_begin = list(date)
            date_begin[3] = date_begin[4] = 0  # set hours and minutes at 0
            date_begin = tuple(date_begin)

            date_end = list(date)
            date_end[3] = 23
            date_end[4] = 59
            date_end = tuple(date_end)
            print("Seules ces heures sont disponibles pour ce jour")

            query = "SELECT DISTINCT time FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING".format(
                self.table,
                date_begin,
                date_end,
            )
Romain Creuzenet's avatar
Romain Creuzenet committed
248
            for row in execute_query(query):
249
250
                resp = list(getattr(row, "time"))
                print(str(resp[3]).zfill(2) + ":" + str(resp[4]).zfill(2), end=" - ")
{}'s avatar
{} committed
251

Romain Creuzenet's avatar
Romain Creuzenet committed
252
253
    def cluster(self):
        self.table = "TABLE_TIME"
{}'s avatar
{} committed
254

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
255
256
257
258
259
260
261
262
263
264
        # Ask information from user
        print("=== Choix 3 : CLUSTER ===")
        print("Vous allez devoir choisir une période de temps. Chaque station aura une pondérence suivant le nombre de"
              " mesures prises durant cette péridoe")
        date_begin = date_end = None
        while date_begin is None or date_begin >= date_end:
            print("La date de départ :")
            date_begin = ask_d()
            print("Entrez la date de fin :")
            date_end = ask_d()
Romain Creuzenet's avatar
Romain Creuzenet committed
265
        print("Entrez le nombre de cluster voulus")
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
266
        nb_cluster = ask_int()
Romain Creuzenet's avatar
Romain Creuzenet committed
267

Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
        # Initialisation
        query = "SELECT lon, lat FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING" \
                "".format(self.table, date_begin, date_end)
        old_centroids = None
        new_centroids = [
            (
                random.randint(int(self.x_min * 10000), int(self.x_max * 10000)) / 10000,  # x with 4 decimals
                random.randint(int(self.y_min * 10000), int(self.y_max * 10000)) / 10000,  # x with 4 decimals
            )
            for _ in range(nb_cluster)
        ]

        while old_centroids != new_centroids:
            old_centroids = new_centroids
            data = [
                {'x': 0, 'y': 0, 'nb': 0}
                for _ in range(nb_cluster)
            ]
            # could be parallelize
            for row in execute_query(query):
                distances = [
                    (x - row.lon) ** 2 + (y - row.lat) ** 2
                    for x, y in old_centroids
                ]
                i = distances.index(min(distances))
                data[i]['x'] += row.lon
                data[i]['y'] += row.lat
                data[i]['nb'] += 1
            # end calc parallelize
            if 0 in [value['nb'] for value in data]:
                # cluster empty do it again
                new_centroids = [
                    (
                        random.randint(int(self.x_min * 10000), int(self.y_min * 10000)) / 10000,  # x with 4 decimals
                        random.randint(int(self.y_min * 10000), int(self.y_max * 10000)) / 10000,  # x with 4 decimals
                    )
                    for _ in range(nb_cluster)
                ]
            else:
                new_centroids = [
                    (
                        float("{0:.4f}".format(elt['x'] / elt['nb'])),
                        float("{0:.4f}".format(elt['y'] / elt['nb'])),
                    )
                    for elt in data
                ]

        # configuration map map
        plt.figure(figsize=(14, 14))
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
        # draw coastlines, country boundaries, fill continents.
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
        # draw the edge of the map projection region (the projection limb)
        the_map.drawmapboundary(fill_color='aqua')
        # draw lat/lon grid lines every 30 degrees.
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

        colors = [generate_color() for _ in range(nb_cluster)]

        # Add centroids
        for i, (lon, lat) in enumerate(old_centroids):
            x, y = the_map(lon, lat)
            # plt.plot(x, y, 'go')
            plt.annotate("Cluster {}".format(i), (x, y), color=colors[i])
            the_map.plot(x, y, marker='D', color=colors[i])

        # Add all points
        query = "SELECT station, lon, lat FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING" \
                "".format(self.table, date_begin, date_end)
        stations = set()
        zones = [[] for _ in range(nb_cluster)]
Romain Creuzenet's avatar
Romain Creuzenet committed
349
        for row in execute_query(query):
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
350
            if row.station in stations:
Romain Creuzenet's avatar
Romain Creuzenet committed
351
                continue
Romain Creuzenet's avatar
cluster  
Romain Creuzenet committed
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
            else:
                stations.add(row.station)

            # Analyse the point
            distances = [
                (row.lon - x_centroid) ** 2 + (row.lat - y_centroid) ** 2
                for x_centroid, y_centroid in old_centroids
            ]
            i = distances.index(min(distances))
            zones[i].append((row.lon, row.lat))

            # Add the point
            x, y = the_map(row.lon, row.lat)
            the_map.plot(x, y, marker=".", color=colors[i])
            plt.annotate(row.station, (x, y), color=colors[i])

        # add zones
        for i, zone in enumerate(zones):
            # Remote point inside box
            lats, lons = clear_zone(zone)

            x, y = the_map(lons, lats)
            the_map.plot(x, y, marker=None, color=colors[i])

        title = "{} clusters du {} au {}".format(
            nb_cluster,
            datetime(*list(date_begin)).strftime('%Y-%m-%d %H:%M'),
            datetime(*list(date_end)).strftime('%Y-%m-%d %H:%M')
        )
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
        path = os.path.join(DIR_OUT, title.lower() + '.png')
        plt.savefig(path)
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
387
388
389
390


if __name__ == "__main__":
    Manager().run()