Commit cac80d8c authored by sim-baz's avatar sim-baz
Browse files

Modify table type (date and time) + plots

parent d5a7c1c1
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from cassandra.cluster import Cluster from cassandra.cluster import Cluster
from datetime import datetime from datetime import datetime
import numpy as np
import loading
table_name_space = loading.table_name_space
table_name_date = loading.table_name_date
MIN_DATE = 2001
MAX_DATE = 2010
table_name_station = "station"
numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti","mslp","vsby","gust","skyl1","skyl2","skyl3","skyl4","feel","ice_accretion_1hr","ice_accretion_3hr","ice_accretion_6hr","peak_wind_gust","peak_wind_drct","peak_wind_time"] numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti","mslp","vsby","gust","skyl1","skyl2","skyl3","skyl4","feel","ice_accretion_1hr","ice_accretion_3hr","ice_accretion_6hr","peak_wind_gust","peak_wind_drct","peak_wind_time"]
cluster = Cluster() cluster = Cluster()
...@@ -10,39 +18,103 @@ session = cluster.connect() ...@@ -10,39 +18,103 @@ session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar") session.set_keyspace("bazinsim_roisinos_metar")
def getHistory(station, indicator): def getHistory(station, indicator):
datas = session.execute(f"SELECT year, month, day, hour, minute, {indicator} FROM {table_name} where station = '{station}'") datas = session.execute(f"SELECT datee, {indicator} FROM {table_name_space} where station = '{station}'")
# for t in datas:
# print(t[0])
return datas return datas
def plotHistory(station, indicator): def getMeanByDay(table, dateMin, dateMax):
table_date = {}
for r in table:
year = int(str(r[0])[0:4])
if year >= dateMin and year < dateMax and r[len(r) - 1] != None:
date = str(r[0])
if date not in table_date.keys():
table_date[date] = 0,0
table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)
for d in table_date.keys():
table_date[d] = table_date[d][0] / table_date[d][1]
return table_date
def getMeanByMonth(table):
table_month = {}
for r in table:
if r[len(r) - 1] != None:
month = str(r[0])[5:7]
if month not in table_month.keys():
table_month[month] = 0,0
table_month[month] = (table_month[month][0] + r[len(r) - 1], table_month[month][1] + 1)
for d in table_month.keys():
table_month[d] = table_month[d][0] / table_month[d][1]
return table_month
def plotHistory(station, indicator, dateMin, dateMax):
if indicator in numeric_columns: if indicator in numeric_columns:
if dateMin == dateMax:
print(f"Les dates ne doivent pas être égales")
return
if dateMin < MIN_DATE or dateMin > (MAX_DATE + 1) or dateMax < MIN_DATE or dateMax > (MAX_DATE + 1):
print(f"Les dates doivent être comprises entre {MIN_DATE} et {MAX_DATE}")
return
table = getHistory(station, indicator) table = getHistory(station, indicator)
table = list(table)
if not table: if not table:
print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator}") print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator}")
return return
table_date = {} table_mean = getMeanByDay(table, dateMin, dateMax)
for r in table:
if (r[len(r) - 1] != None):
date = datetime.strptime(str(r[0]) + "-" + str(r[1]) + "-" + str(r[2]), '%Y-%m-%d').date()
if date not in table_date.keys():
table_date[date] = 0,0
table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)
for d in table_date.keys(): table_mean_by_month = getMeanByMonth(table)
table_date[d] = table_date[d][0] / table_date[d][1]
# Duplicate list for each year
liste = []
for i in range(dateMax - dateMin):
for key,value in table_mean_by_month.items():
liste.append([key, value])
i = dateMin
j = 1
for k in range(len(liste)):
j += 1
liste[k][0] = str(i) + '-' + str(liste[k][0])
if j > 12:
i += 1
j = 1
# Name for file
currentDateTime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") currentDateTime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_name = str(currentDateTime) + "_" + station + "_" + indicator + ".png" file_name = str(currentDateTime) + "_" + station + "_" + indicator + ".png"
plt.plot_date(table_date.keys(), table_date.values(), '-', xdate = True) step_graduation = max((dateMax - dateMin) // 5, 1)
print(step_graduation)
graduation = ["20" + "0" * (2 - len(str(i))) + str(i) + "-01-01" for i in range(int(str(dateMin)[2:4]), int(str(dateMax)[2:4]), step_graduation)]
graduation.append("20" + "0" * (2 - len(str(dateMax - 1)[2:4])) + str(dateMax - 1)[2:4] + "-12-31")
print (graduation)
fig, ax1 = plt.subplots()
ax1.plot_date(table_mean.keys(), table_mean.values(), '-', xdate = True)
ax1.xaxis.set_ticks(graduation)
ax2 = ax1.twiny()
ax2.plot([elt[0] for elt in liste], [elt[1] for elt in liste], '-', color = "r")
ax2.xaxis.set_ticks([])
plt.title(f"Evolution de {indicator} pour la station {station}") plt.title(f"Evolution de {indicator} pour la station {station}")
plt.xlabel('Date') plt.xlabel('Date')
plt.ylabel(indicator) plt.ylabel(indicator)
plt.tick_params(
axis='x',
which='both',
bottom=False,
top=True
)
plt.savefig(file_name) plt.savefig(file_name)
print(f"Le graphique a été enregistré à {file_name}")
else: else:
print("Les données pour cet indicateur ne sont pas numériques, impossible de tracer un graphique") print("Les données pour cet indicateur ne sont pas numériques, impossible de tracer un graphique")
plotHistory("EFKI", "tmpf") plotHistory("EFKI", "tmpf", 2001, 2005)
\ No newline at end of file \ No newline at end of file
...@@ -52,11 +52,8 @@ def loadata(filename): ...@@ -52,11 +52,8 @@ def loadata(filename):
data["station"] = r["station"] data["station"] = r["station"]
valid = dateparser.match(r["valid"]).groupdict() valid = dateparser.match(r["valid"]).groupdict()
data["year"] = int(valid["year"]) data["date"] = valid["year"] + "-" + valid["month"] + "-" + valid["day"]
data["month"] = int(valid["month"]) data["time"] = valid["hour"] + ":" + valid["minute"] + ":00"
data["day"] = int(valid["day"])
data["hour"] = int(valid["hour"])
data["minute"] = int(valid["minute"])
data["lon"] = float(r["lon"]) data["lon"] = float(r["lon"])
data["lat"] = float(r["lat"]) data["lat"] = float(r["lat"])
...@@ -179,11 +176,8 @@ def loadata(filename): ...@@ -179,11 +176,8 @@ def loadata(filename):
NAME_COLUMNS = """ NAME_COLUMNS = """
station , station ,
year , datee ,
month , timee ,
day ,
hour ,
minute ,
lon , lon ,
lat , lat ,
tmpf , tmpf ,
...@@ -224,11 +218,8 @@ def dropTableQuery(table): ...@@ -224,11 +218,8 @@ def dropTableQuery(table):
def createTableQuery(table): def createTableQuery(table):
query = f"""CREATE TABLE {table}( query = f"""CREATE TABLE {table}(
station varchar, station varchar,
year int, datee date,
month int, timee time,
day int,
hour int,
minute int,
lon decimal, lon decimal,
lat decimal, lat decimal,
tmpf decimal, tmpf decimal,
...@@ -258,18 +249,15 @@ def createTableQuery(table): ...@@ -258,18 +249,15 @@ def createTableQuery(table):
peak_wind_drct decimal, peak_wind_drct decimal,
peak_wind_time decimal, peak_wind_time decimal,
metar varchar, metar varchar,
PRIMARY KEY((station), year, month, day, hour, minute) PRIMARY KEY((station), datee, timee)
)""" )"""
return query return query
def createTableQueryPartitionningByDate(table): def createTableQueryPartitionningByDate(table):
query = f"""CREATE TABLE {table}( query = f"""CREATE TABLE {table}(
station varchar, station varchar,
year int, datee date,
month int, timee time,
day int,
hour int,
minute int,
lon decimal, lon decimal,
lat decimal, lat decimal,
tmpf decimal, tmpf decimal,
...@@ -299,7 +287,7 @@ def createTableQueryPartitionningByDate(table): ...@@ -299,7 +287,7 @@ def createTableQueryPartitionningByDate(table):
peak_wind_drct decimal, peak_wind_drct decimal,
peak_wind_time decimal, peak_wind_time decimal,
metar varchar, metar varchar,
PRIMARY KEY((year, month, day), station, hour, minute) PRIMARY KEY((datee), station, timee)
)""" )"""
return query return query
...@@ -311,11 +299,8 @@ def insertQueryData(row, table): ...@@ -311,11 +299,8 @@ def insertQueryData(row, table):
) )
VALUES( VALUES(
'{row["station"]}', '{row["station"]}',
{row["year"]}, '{row["date"]}',
{row["month"]}, '{row["time"]}',
{row["day"]},
{row["hour"]},
{row["minute"]},
{row["lon"]}, {row["lon"]},
{row["lat"]}, {row["lat"]},
{row["tmpf"]}, {row["tmpf"]},
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment