main.py 12 KB
Newer Older
Romain Creuzenet's avatar
Romain Creuzenet committed
1
"""File to execute to show results"""
Romain Creuzenet's avatar
Romain Creuzenet committed
2
# Data
3
from parameters import SESSION, DIR_OUT, START, END
Romain Creuzenet's avatar
Romain Creuzenet committed
4
# Basic
{}'s avatar
{} committed
5
import matplotlib.dates as mdates
Romain Creuzenet's avatar
Romain Creuzenet committed
6
import matplotlib.pyplot as plt
Romain Creuzenet's avatar
Romain Creuzenet committed
7
import warnings
8
9
import re
import os
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
10
import random
Romain Creuzenet's avatar
Romain Creuzenet committed
11
# Stats
{}'s avatar
{} committed
12
13
14
import statsmodels.graphics as stm_graphs
import pandas as pd 
import statsmodels.api as stm
Romain Creuzenet's avatar
Romain Creuzenet committed
15
16
17
18
19
import numpy as np
# Graph map
from mpl_toolkits.basemap import Basemap
from pandas.plotting import register_matplotlib_converters
from datetime import datetime
{}'s avatar
{} committed
20

Romain Creuzenet's avatar
Romain Creuzenet committed
21
22
register_matplotlib_converters()
warnings.filterwarnings("ignore")
Romain Creuzenet's avatar
Romain Creuzenet committed
23
24
25
26
27
28
29
30


def execute_query(query):
    for row in SESSION.execute(query):
        yield row


def ask_q(possibilities, text=">>> "):
Romain Creuzenet's avatar
Romain Creuzenet committed
31
    """Demande une question"""
Romain Creuzenet's avatar
Romain Creuzenet committed
32
33
34
35
36
    answer = None
    while answer not in possibilities:
        answer = input(text)
    return answer

{}'s avatar
{} committed
37

Romain Creuzenet's avatar
Romain Creuzenet committed
38
39
40
def ask_d(text=">>> "):
    """Demande une date"""
    print("Entrez une date sous la forme YYYY-MM-DD HH:mm")
41
    print("Comprise entre {} et {}".format(START.strftime('%Y-%m-%d'), END.strftime('%Y-%m-%d')))
Romain Creuzenet's avatar
Romain Creuzenet committed
42
43
    date_parser = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2})")
    match = None
{}'s avatar
{} committed
44

Romain Creuzenet's avatar
Romain Creuzenet committed
45
46
    while match is None:
        t = input(text)
{}'s avatar
{} committed
47
        match = date_parser.match(t)
Romain Creuzenet's avatar
Romain Creuzenet committed
48
49
50

    m = match.groupdict()
    result = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute']))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
51
52
53
54
55
56
57
58
59

    try:
        date = datetime(*list(result))
        if not START < date < END:
            return ask_d(text)
    except ValueError:
        return ask_d(text)
    else:
        return result
Romain Creuzenet's avatar
Romain Creuzenet committed
60

Romain Creuzenet's avatar
Romain Creuzenet committed
61

62
def chose_attr():
Romain Creuzenet's avatar
Romain Creuzenet committed
63
64
65
    """Permet de demander un attribut dans la table"""
    # Search element
    decision = {
Romain Creuzenet's avatar
Romain Creuzenet committed
66
67
        "tmpf": "La témparature (en Fahrenheit)",
        "relh": "L'humidité ( en %)"
Romain Creuzenet's avatar
Romain Creuzenet committed
68
69
70
71
72
73
74
    }
    print("Choisissez un élément parmis les suivant :")
    for code, text in decision.items():
        print("\t-", text, ":", code)
    return ask_q(decision.keys())


Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
75
76
def ask_int(text=">>> "):
    """Permet de demander un entier"""
Romain Creuzenet's avatar
Romain Creuzenet committed
77
78
79
80
81
82
    answer = ""
    while not answer or not answer.isdigit():
        answer = input(text)
    return int(answer)


Romain Creuzenet's avatar
Romain Creuzenet committed
83
84
85
86
87
88
def generate_color(i):
    colors = 'bgrcmykw'
    if i < len(colors):
        return colors[i]
    else:
        return "#{:06x}".format(random.randint(0, 0xFFFFFF))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
89
90


Romain Creuzenet's avatar
Romain Creuzenet committed
91
92
93
class Manager:
    table = None  # table name use by the function

Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
94
95
96
97
98
99
100
    # for map
    # data has a precision of 4 decimals
    x_min = -18.42
    x_max = 10.35
    y_min = 25.281898
    y_max = 48.08

Romain Creuzenet's avatar
Romain Creuzenet committed
101
    def run(self):
102
103
104
105
106
        """Chose objective"""
        # Initialisation
        os.makedirs(DIR_OUT, exist_ok=True)

        # Chose objective
Romain Creuzenet's avatar
Romain Creuzenet committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
        print("Choisissez ce que vous voulez faire")
        print("\t1 - Pour un point donné de l’espace, je veux pouvoir avoir un historique du passé")
        print("\t2 - À un instant donné je veux pouvoir obtenir une carte me représentant n’importe quel indicateur")
        print("\t3 - Pour une période de temps donnée, je veux pouvoir obtenir clusteriser l’espace, et représenter "
              "cette clusterisation")
        decision = {
            "1": "historic",
            "2": "map",
            "3": "cluster"
        }
        answer = ask_q(decision.keys())
        getattr(self, decision[answer])()

    def historic(self):
        self.table = "TABLE_SPACE"
122
        print("=== Choix 1 : Historique ===")
Romain Creuzenet's avatar
Romain Creuzenet committed
123
124

        # Search station
Romain Creuzenet's avatar
Romain Creuzenet committed
125
        stations = []
Romain Creuzenet's avatar
Romain Creuzenet committed
126
        print("Choisissez une station parmis celles-ci:")
Romain Creuzenet's avatar
Romain Creuzenet committed
127
128
129
130
131
132
133
        query = "SELECT DISTINCT station FROM {}".format(self.table)
        for i, row in enumerate(execute_query(query), 1):
            end = "\n" if i % 3 == 0 else ""
            print("\t", row.station, end=end)
            stations.append(row.station)
        print()

Romain Creuzenet's avatar
Romain Creuzenet committed
134
        station = ask_q(stations)
135
        attr = chose_attr()
Romain Creuzenet's avatar
Romain Creuzenet committed
136

{}'s avatar
{} committed
137
        ts = pd.Series()
138
        query = "SELECT time, {} FROM {} WHERE station={}".format(attr, self.table, station.__repr__())
Romain Creuzenet's avatar
Romain Creuzenet committed
139
        for row in execute_query(query):
Romain Creuzenet's avatar
Romain Creuzenet committed
140
141
142
            value = getattr(row, attr)
            if value is None:
                continue
{}'s avatar
{} committed
143
144
            ts.loc[datetime(*list(row.time))] = value

Romain Creuzenet's avatar
Romain Creuzenet committed
145
        plt.figure(figsize=(25, 16))
{}'s avatar
{} committed
146
147
        axes = plt.subplot()
        axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
Romain Creuzenet's avatar
Romain Creuzenet committed
148
149
        plt.xticks(rotation=90)

{}'s avatar
{} committed
150
        plt.plot(ts, label=attr)
151
        plt.title("Donnees de {} pour la station : {}".format(attr, station))
Romain Creuzenet's avatar
Romain Creuzenet committed
152
        plt.legend()
153
154
        path = os.path.join(DIR_OUT, 'graph_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
155
        plt.show()
{}'s avatar
{} committed
156

Romain Creuzenet's avatar
Romain Creuzenet committed
157
        res = stm.tsa.seasonal_decompose(ts, freq=15, extrapolate_trend='freq')
{}'s avatar
{} committed
158
        res.plot()
159
160
        path = os.path.join(DIR_OUT, 'decompose_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
161
        plt.show()
{}'s avatar
{} committed
162

Romain Creuzenet's avatar
Romain Creuzenet committed
163
        stm_graphs.tsaplots.plot_acf(ts, lags=30)
164
165
        path = os.path.join(DIR_OUT, 'acf_{}_{}.png'.format(station, attr))
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
166
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
167
168

    def map(self):
{}'s avatar
{} committed
169
        self.table = "TABLE_TIME"
170
        print("=== Choix 2 : Map ===")
{}'s avatar
{} committed
171
172

        date = ask_d()
173
        attr = chose_attr()
{}'s avatar
{} committed
174

Romain Creuzenet's avatar
Romain Creuzenet committed
175
        plt.figure(figsize=(14, 14))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
176
177
178
179
180
181
182
183
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
{}'s avatar
{} committed
184
        # draw coastlines, country boundaries, fill continents.
Romain Creuzenet's avatar
Romain Creuzenet committed
185
186
187
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
{}'s avatar
{} committed
188
        # draw the edge of the map projection region (the projection limb)
Romain Creuzenet's avatar
Romain Creuzenet committed
189
        the_map.drawmapboundary(fill_color='aqua')
{}'s avatar
{} committed
190
        # draw lat/lon grid lines every 30 degrees.
Romain Creuzenet's avatar
Romain Creuzenet committed
191
192
193
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
194
        date_ok = False  # The date is ok
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        query = "SELECT station, lon, lat, {} FROM {} WHERE time={}".format(attr, self.table, date)
        for row in execute_query(query):
            if getattr(row, "station") is None or getattr(row, attr) is None:
                continue
            date_ok = True
            x, y = the_map(getattr(row, "lon"), getattr(row, "lat"))
            value = getattr(row, attr)
            plt.plot(x, y, 'go')
            plt.annotate(round(value, 1), (x, y))

        title = "Map {} du {}".format(attr, datetime(*list(date)).strftime('%Y-%m-%d %H:%M'))
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
        path = os.path.join(DIR_OUT, title.lower() + '.png')
        plt.savefig(path)
Romain Creuzenet's avatar
Romain Creuzenet committed
211
        plt.show()
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

        # If date is wrong, show some dates for this day
        if not date_ok:
            date_begin = list(date)
            date_begin[3] = date_begin[4] = 0  # set hours and minutes at 0
            date_begin = tuple(date_begin)

            date_end = list(date)
            date_end[3] = 23
            date_end[4] = 59
            date_end = tuple(date_end)
            print("Seules ces heures sont disponibles pour ce jour")

            query = "SELECT DISTINCT time FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING".format(
                self.table,
                date_begin,
                date_end,
            )
Romain Creuzenet's avatar
Romain Creuzenet committed
230
            for row in execute_query(query):
231
232
                resp = list(getattr(row, "time"))
                print(str(resp[3]).zfill(2) + ":" + str(resp[4]).zfill(2), end=" - ")
{}'s avatar
{} committed
233

Romain Creuzenet's avatar
Romain Creuzenet committed
234
235
    def cluster(self):
        self.table = "TABLE_TIME"
{}'s avatar
{} committed
236

Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
237
238
        # Ask information from user
        print("=== Choix 3 : CLUSTER ===")
Romain Creuzenet's avatar
Romain Creuzenet committed
239
240
241
        attr = chose_attr()
        print("Vous allez devoir choisir une période de temps. On considéra la moyenne de l'attribut sur cette "
              "période de temps")
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
242
243
244
245
246
247
        date_begin = date_end = None
        while date_begin is None or date_begin >= date_end:
            print("La date de départ :")
            date_begin = ask_d()
            print("Entrez la date de fin :")
            date_end = ask_d()
Romain Creuzenet's avatar
Romain Creuzenet committed
248
        print("Entrez le nombre de cluster voulus")
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
249
        nb_cluster = ask_int()
Romain Creuzenet's avatar
Romain Creuzenet committed
250

Romain Creuzenet's avatar
Romain Creuzenet committed
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
        # Calc of mean
        query = "SELECT station, lon, lat, {attr} FROM {table} WHERE time >= {begin} AND time <= {end} ALLOW FILTERING" \
                "".format(
                    attr=attr,
                    table=self.table,
                    begin=date_begin,
                    end=date_end
                )
        stations = {}  # station: {'nb': 3, 'sum': 5, 'lon': 3.27, 'lat': 12}
        for row in execute_query(query):
            if None in (row.station, row.lon, row.lat, getattr(row, attr)):
                continue
            if row.station in stations:
                stations[row.station]['sum'] += getattr(row, attr)
                stations[row.station]['nb'] += 1
            else:
                stations[row.station] = {'nb': 1, 'sum': getattr(row, attr), 'lon': row.lon, 'lat': row.lat}
        for value in stations.values():
            value['mean'] = value['sum'] / value['nb']
        means = [elt['mean'] for elt in stations.values()]

        # Initialisation mean
        mini = int(min(means))
        maxi = int(max(means))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
275
276
        old_centroids = None
        new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
277
            random.randint(mini, maxi)
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
278
279
280
281
282
283
            for _ in range(nb_cluster)
        ]

        while old_centroids != new_centroids:
            old_centroids = new_centroids
            data = [
Romain Creuzenet's avatar
Romain Creuzenet committed
284
                {'sum': 0, 'nb': 0}
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
285
286
287
                for _ in range(nb_cluster)
            ]
            # could be parallelize
Romain Creuzenet's avatar
Romain Creuzenet committed
288
            for m in means:
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
289
                distances = [
Romain Creuzenet's avatar
Romain Creuzenet committed
290
291
                    (m - centroid) ** 2
                    for centroid in old_centroids
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
292
293
                ]
                i = distances.index(min(distances))
Romain Creuzenet's avatar
Romain Creuzenet committed
294
                data[i]['sum'] += m
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
295
296
297
298
299
                data[i]['nb'] += 1
            # end calc parallelize
            if 0 in [value['nb'] for value in data]:
                # cluster empty do it again
                new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
300
                    random.randint(int(min(means), int(max(means))))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
301
302
303
304
                    for _ in range(nb_cluster)
                ]
            else:
                new_centroids = [
Romain Creuzenet's avatar
Romain Creuzenet committed
305
                    float("{0:.2f}".format(elt['sum'] / elt['nb']))
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
                    for elt in data
                ]

        # configuration map map
        plt.figure(figsize=(14, 14))
        the_map = Basemap(
            projection='mill',
            llcrnrlat=self.y_min,
            llcrnrlon=self.x_min,
            urcrnrlat=self.y_max,
            urcrnrlon=self.x_max,
            resolution='l'
        )
        # draw coastlines, country boundaries, fill continents.
        the_map.drawcoastlines(linewidth=0.25)
        the_map.drawcountries(linewidth=0.25)
        the_map.fillcontinents(color='coral', lake_color='aqua')
        # draw the edge of the map projection region (the projection limb)
        the_map.drawmapboundary(fill_color='aqua')
        # draw lat/lon grid lines every 30 degrees.
        the_map.drawmeridians(np.arange(0, 360, 30))
        the_map.drawparallels(np.arange(-90, 90, 30))

Romain Creuzenet's avatar
Romain Creuzenet committed
329
        colors = [generate_color(i) for i in range(nb_cluster)]
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
330
331

        # Add all points
Romain Creuzenet's avatar
Romain Creuzenet committed
332
        for station, value in stations.items():
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
333
334
            # Analyse the point
            distances = [
Romain Creuzenet's avatar
Romain Creuzenet committed
335
336
                (value['mean'] - centroid) ** 2
                for centroid in old_centroids
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
337
338
339
340
            ]
            i = distances.index(min(distances))

            # Add the point
Romain Creuzenet's avatar
Romain Creuzenet committed
341
            x, y = the_map(value['lon'], value['lat'])
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
342
            the_map.plot(x, y, marker=".", color=colors[i])
Romain Creuzenet's avatar
Romain Creuzenet committed
343
            plt.annotate("{} : {:.2f}".format(station, value['mean']), (x, y), color=colors[i])
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
344

Romain Creuzenet's avatar
Romain Creuzenet committed
345
346
347
348
349
        title = "{nb_cluster} clusters de {attr} du {begin} au {end}".format(
            nb_cluster=nb_cluster,
            attr=attr,
            begin=datetime(*list(date_begin)).strftime('%Y-%m-%d %H:%M'),
            end=datetime(*list(date_end)).strftime('%Y-%m-%d %H:%M')
Romain Creuzenet's avatar
cluster    
Romain Creuzenet committed
350
351
352
353
354
355
356
        )
        plt.title(title)
        for elt in ' :-':
            title = title.replace(elt, '_')
        path = os.path.join(DIR_OUT, title.lower() + '.png')
        plt.savefig(path)
        plt.show()
Romain Creuzenet's avatar
Romain Creuzenet committed
357
358
359
360


if __name__ == "__main__":
    Manager().run()