Commit cac80d8c authored by sim-baz's avatar sim-baz

Modify table type (date and time) + plots

parent d5a7c1c1
import matplotlib.pyplot as plt
from cassandra.cluster import Cluster
from datetime import datetime
import numpy as np
import loading
table_name_space = loading.table_name_space
table_name_date = loading.table_name_date
MIN_DATE = 2001
MAX_DATE = 2010
table_name_station = "station"
numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti","mslp","vsby","gust","skyl1","skyl2","skyl3","skyl4","feel","ice_accretion_1hr","ice_accretion_3hr","ice_accretion_6hr","peak_wind_gust","peak_wind_drct","peak_wind_time"]
cluster = Cluster()
......@@ -10,39 +18,103 @@ session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar")
def getHistory(station, indicator):
datas = session.execute(f"SELECT year, month, day, hour, minute, {indicator} FROM {table_name} where station = '{station}'")
# for t in datas:
# print(t[0])
datas = session.execute(f"SELECT datee, {indicator} FROM {table_name_space} where station = '{station}'")
return datas
def plotHistory(station, indicator):
def getMeanByDay(table, dateMin, dateMax):
table_date = {}
for r in table:
year = int(str(r[0])[0:4])
if year >= dateMin and year < dateMax and r[len(r) - 1] != None:
date = str(r[0])
if date not in table_date.keys():
table_date[date] = 0,0
table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)
for d in table_date.keys():
table_date[d] = table_date[d][0] / table_date[d][1]
return table_date
def getMeanByMonth(table):
table_month = {}
for r in table:
if r[len(r) - 1] != None:
month = str(r[0])[5:7]
if month not in table_month.keys():
table_month[month] = 0,0
table_month[month] = (table_month[month][0] + r[len(r) - 1], table_month[month][1] + 1)
for d in table_month.keys():
table_month[d] = table_month[d][0] / table_month[d][1]
return table_month
def plotHistory(station, indicator, dateMin, dateMax):
if indicator in numeric_columns:
if dateMin == dateMax:
print(f"Les dates ne doivent pas être égales")
return
if dateMin < MIN_DATE or dateMin > (MAX_DATE + 1) or dateMax < MIN_DATE or dateMax > (MAX_DATE + 1):
print(f"Les dates doivent être comprises entre {MIN_DATE} et {MAX_DATE}")
return
table = getHistory(station, indicator)
table = list(table)
if not table:
print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator}")
return
table_date = {}
for r in table:
if (r[len(r) - 1] != None):
date = datetime.strptime(str(r[0]) + "-" + str(r[1]) + "-" + str(r[2]), '%Y-%m-%d').date()
if date not in table_date.keys():
table_date[date] = 0,0
table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)
table_mean = getMeanByDay(table, dateMin, dateMax)
for d in table_date.keys():
table_date[d] = table_date[d][0] / table_date[d][1]
table_mean_by_month = getMeanByMonth(table)
# Duplicate list for each year
liste = []
for i in range(dateMax - dateMin):
for key,value in table_mean_by_month.items():
liste.append([key, value])
i = dateMin
j = 1
for k in range(len(liste)):
j += 1
liste[k][0] = str(i) + '-' + str(liste[k][0])
if j > 12:
i += 1
j = 1
# Name for file
currentDateTime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_name = str(currentDateTime) + "_" + station + "_" + indicator + ".png"
plt.plot_date(table_date.keys(), table_date.values(), '-', xdate = True)
step_graduation = max((dateMax - dateMin) // 5, 1)
print(step_graduation)
graduation = ["20" + "0" * (2 - len(str(i))) + str(i) + "-01-01" for i in range(int(str(dateMin)[2:4]), int(str(dateMax)[2:4]), step_graduation)]
graduation.append("20" + "0" * (2 - len(str(dateMax - 1)[2:4])) + str(dateMax - 1)[2:4] + "-12-31")
print (graduation)
fig, ax1 = plt.subplots()
ax1.plot_date(table_mean.keys(), table_mean.values(), '-', xdate = True)
ax1.xaxis.set_ticks(graduation)
ax2 = ax1.twiny()
ax2.plot([elt[0] for elt in liste], [elt[1] for elt in liste], '-', color = "r")
ax2.xaxis.set_ticks([])
plt.title(f"Evolution de {indicator} pour la station {station}")
plt.xlabel('Date')
plt.ylabel(indicator)
plt.tick_params(
axis='x',
which='both',
bottom=False,
top=True
)
plt.savefig(file_name)
print(f"Le graphique a été enregistré à {file_name}")
else:
print("Les données pour cet indicateur ne sont pas numériques, impossible de tracer un graphique")
plotHistory("EFKI", "tmpf")
\ No newline at end of file
plotHistory("EFKI", "tmpf", 2001, 2005)
\ No newline at end of file
......@@ -52,11 +52,8 @@ def loadata(filename):
data["station"] = r["station"]
valid = dateparser.match(r["valid"]).groupdict()
data["year"] = int(valid["year"])
data["month"] = int(valid["month"])
data["day"] = int(valid["day"])
data["hour"] = int(valid["hour"])
data["minute"] = int(valid["minute"])
data["date"] = valid["year"] + "-" + valid["month"] + "-" + valid["day"]
data["time"] = valid["hour"] + ":" + valid["minute"] + ":00"
data["lon"] = float(r["lon"])
data["lat"] = float(r["lat"])
......@@ -179,11 +176,8 @@ def loadata(filename):
NAME_COLUMNS = """
station ,
year ,
month ,
day ,
hour ,
minute ,
datee ,
timee ,
lon ,
lat ,
tmpf ,
......@@ -224,11 +218,8 @@ def dropTableQuery(table):
def createTableQuery(table):
query = f"""CREATE TABLE {table}(
station varchar,
year int,
month int,
day int,
hour int,
minute int,
datee date,
timee time,
lon decimal,
lat decimal,
tmpf decimal,
......@@ -258,18 +249,15 @@ def createTableQuery(table):
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY((station), year, month, day, hour, minute)
PRIMARY KEY((station), datee, timee)
)"""
return query
def createTableQueryPartitionningByDate(table):
query = f"""CREATE TABLE {table}(
station varchar,
year int,
month int,
day int,
hour int,
minute int,
datee date,
timee time,
lon decimal,
lat decimal,
tmpf decimal,
......@@ -299,7 +287,7 @@ def createTableQueryPartitionningByDate(table):
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY((year, month, day), station, hour, minute)
PRIMARY KEY((datee), station, timee)
)"""
return query
......@@ -311,11 +299,8 @@ def insertQueryData(row, table):
)
VALUES(
'{row["station"]}',
{row["year"]},
{row["month"]},
{row["day"]},
{row["hour"]},
{row["minute"]},
'{row["date"]}',
'{row["time"]}',
{row["lon"]},
{row["lat"]},
{row["tmpf"]},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment