history.py 7.24 KB
Newer Older
sim-baz's avatar
sim-baz committed
1
2
3
import matplotlib.pyplot as plt
from cassandra.cluster import Cluster
from datetime import datetime
4
import numpy as np
sim-baz's avatar
Fix  
sim-baz committed
5
import math
6
7
8
9
10

import loading

table_name_space = loading.table_name_space
table_name_date = loading.table_name_date
sim-baz's avatar
sim-baz committed
11
numeric_columns = loading.numeric_columns
12

sim-baz's avatar
sim-baz committed
13
14
MIN_DATE = loading.MIN_DATE
MAX_DATE = loading.MAX_DATE
sim-baz's avatar
sim-baz committed
15

sim-baz's avatar
sim-baz committed
16
17
18
19
20
21
22
23
24

'''
name: getHistory
description: Return the datas from the database
parameters:
    * station: station id to restrict on
    * indicator: indicator to select
return: query to create table
'''
sim-baz's avatar
sim-baz committed
25
def getHistory(station, indicator):
Oscar Roisin's avatar
Oscar Roisin committed
26
27
    datas = session.execute(f"SELECT year, month, day, {indicator} FROM {table_name_space} where station = '{station}'")
    return datas
sim-baz's avatar
sim-baz committed
28

sim-baz's avatar
sim-baz committed
29
30
31
32
33
34
35
36
37
'''
name: getMeanByDay
description: Compute the mean of values by day
parameters:
    * table: list with values
    * dateMin: beginning year of the period to compute
    * dateMax: ending year of the period to compute
return: dictionary with mean with date
'''
38
def getMeanByDay(table, dateMin, dateMax):
Oscar Roisin's avatar
Oscar Roisin committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
    # Dictionary to store sum of measures and number of measures by day
    table_date = {}
    for r in table:
        year = r[0]
        # Verify the measures is for the period chosen by user and have a value
        if year >= dateMin and year < dateMax and  r[len(r) - 1] != None:
            # convert attributes to date format as string
            date = str(r[0]) + "-" + "0" * (2 - len(str(r[1]))) + str(r[1]) + "-" + "0" * (2 - len(str(r[2]))) + str(r[2])
            if date not in table_date.keys():
                table_date[date] = 0,0
            table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)

    # Treat datas to get mean by day
    for d in table_date.keys():
        table_date[d] = table_date[d][0] / table_date[d][1]

    return table_date
56

sim-baz's avatar
sim-baz committed
57
58
59
60
61
62
63
'''
name: getMeanByMonth
description: Compute the mean of values by month for the entire period
parameters:
    * table: list with values
return: dictionary with mean by month
'''
64
def getMeanByMonth(table):
Oscar Roisin's avatar
Oscar Roisin committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    # Dictionary to store sum of measures and number of measures by month
    table_month = {}
    for r in table:
        # Verify the value of measure (needs to have a value)
        if r[len(r) - 1] != None:
            month = r[1]
            if month not in table_month.keys():
                table_month[month] = 0,0
            table_month[month] = (table_month[month][0] + r[len(r) - 1], table_month[month][1] + 1)

    # Treat datas to get mean by month
    for d in table_month.keys():
        table_month[d] = table_month[d][0] / table_month[d][1]

    return table_month
80

sim-baz's avatar
sim-baz committed
81
82
83
84
85
86
87
88
'''
name: verifyYearValidity
description: Verify the validity of the year given
parameters:
    * dateMin: first year
    * dateMax: last year
return: boolean indicating the validity
'''
sim-baz's avatar
sim-baz committed
89
def verifyYearValidity(dateMin, dateMax):
Oscar Roisin's avatar
Oscar Roisin committed
90
91
92
93
    # Verification to ensure the validity of parameters, dates not equal
    if dateMin == dateMax:
        print(f"Les dates ne doivent pas être égales")
        return False
sim-baz's avatar
sim-baz committed
94

Oscar Roisin's avatar
Oscar Roisin committed
95
96
97
98
99
    # Verification to ensure the validity of parameters, dates in the right period
    if dateMin < MIN_DATE or dateMin > (MAX_DATE + 1) or dateMax < MIN_DATE or dateMax > (MAX_DATE + 1):
        print(f"Les dates doivent être comprises entre {MIN_DATE} et {MAX_DATE}")
        return False
    return True
sim-baz's avatar
sim-baz committed
100

sim-baz's avatar
sim-baz committed
101
102
103
104
105
106
107
108
109
'''
name: plotHistory
description: Plot the curves of evolution and seasonality and save it
parameters:
    * station: station to analyse
    * indicator: indicator to analyse evolution
    * dateMin: first year of the period to plot
    * dateMax: last year of the period to plot
'''
110
def plotHistory(station, indicator, dateMin, dateMax):
Oscar Roisin's avatar
Oscar Roisin committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
    dateMax = dateMax + 1
    # Accept only indicator with numeric values (not factors)
    if indicator in numeric_columns:
        if not verifyYearValidity(dateMin, dateMax):
            return

        # Get datas from cassandra table
        table = getHistory(station, indicator)
        table = list(table)

        # If no data for the period selected
        if not table:
            print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator} et pour la période {dateMin} - {dateMax}")
            return

        # Treat datas
        table_mean = getMeanByDay(table, dateMin, dateMax)
        table_mean_by_month = getMeanByMonth(table)
        if not table_mean or not table_mean_by_month:
            print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator} et pour la période {dateMin} - {dateMax}")
            return

        # Duplicate list for each year in the period required
        liste = []
        for i in range(dateMax - dateMin):
            for key,value in table_mean_by_month.items():
                liste.append([key, value])

        # Completing the month to have a date format (yyyy-month-01)
        i = dateMin
        j = 1
        for k in range(len(liste)):
            j += 1
            liste[k][0] = str(i) + '-' + str(liste[k][0])
            if j > 12:
                i += 1
                j = 1

        # Name for file
        currentDateTime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        file_name = str(currentDateTime) + "_" + station + "_" + indicator + ".png"

        # Configure graduation of plot
        # Need to find a nice step for graduation (no more than 6 values to be understandable)
        step_graduation = max(math.ceil((dateMax - dateMin) / 5), 1)
        # Convert the graduation to the date format (yyyy-month-01)
        graduation = ["20" + "0" * (2 - len(str(i))) + str(i) + "-01-01" for i in range(int(str(dateMin)[2:4]), int(str(dateMax)[2:4]), step_graduation)]

        # Pour ne pas que la date de fin se superpose avec une autre date
        if (dateMax - dateMin) % 2 == 0 or step_graduation == 1:
            # Add the last value of graduation for the last day of measures
            graduation.append("20" + "0" * (2 - len(str(dateMax - 1)[2:4])) + str(dateMax - 1)[2:4] + "-12-31")
        
        # Plot, with both measures and season mean
        fig, ax1 = plt.subplots()
        # Measures on axis 1
sim-baz's avatar
Details  
sim-baz committed
167
        ax1.plot_date(table_mean.keys(), table_mean.values(), '-', xdate = True, label = 'Evolution')
Oscar Roisin's avatar
Oscar Roisin committed
168
169
170
        ax1.xaxis.set_ticks(graduation)
        ax2 = ax1.twiny()
        # Seasonal mean
sim-baz's avatar
Details  
sim-baz committed
171
        ax2.plot([elt[0] for elt in liste], [elt[1] for elt in liste], '-', color = "r", label = 'Saison')
Oscar Roisin's avatar
Oscar Roisin committed
172
173
174
175
176
        # Do not show graduation on the top of the plot
        ax2.xaxis.set_ticks([])

        # Set title and labels
        plt.title(f"Evolution de {indicator} pour la station {station}")
sim-baz's avatar
Details  
sim-baz committed
177
178
179
180
181
182
183
184
        ax1.set_xlabel('Date')
        ax1.set_ylabel(indicator)

        # Legend
        h1, l1 = ax1.get_legend_handles_labels()
        h2, l2 = ax2.get_legend_handles_labels()
        plt.legend(h1+h2, l1+l2, loc='lower right')

Oscar Roisin's avatar
Oscar Roisin committed
185
186
187
188
189
190
191
192
193
194
195
196
        plt.tick_params(
            axis='x',
            which='both',
            bottom=False,
            top=True
        )
        # Save figure
        plt.savefig(file_name)
        print(f"Le graphique a été enregistré à {file_name}")

    else:
        print("Les données pour cet indicateur ne sont pas numériques, impossible de tracer un graphique")
sim-baz's avatar
sim-baz committed
197

sim-baz's avatar
sim-baz committed
198
if __name__ == '__main__':
Oscar Roisin's avatar
Oscar Roisin committed
199
200
201
    cluster = Cluster()
    session = cluster.connect()
    session.set_keyspace("bazinsim_roisinos_metar")
sim-baz's avatar
sim-baz committed
202

Oscar Roisin's avatar
Oscar Roisin committed
203
204
205
    print()
    plotHistory("EFKI", "tmpf", 2001, 2004)
    print()