Commit 274af57f authored by sim-baz's avatar sim-baz

Modify date to timestamp

Adding commentaries
Modifying import comportment
parent cac80d8c
......@@ -7,78 +7,92 @@ import loading
table_name_space = loading.table_name_space
table_name_date = loading.table_name_date
numeric_columns = loading.numeric_columns
MIN_DATE = 2001
MAX_DATE = 2010
numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti","mslp","vsby","gust","skyl1","skyl2","skyl3","skyl4","feel","ice_accretion_1hr","ice_accretion_3hr","ice_accretion_6hr","peak_wind_gust","peak_wind_drct","peak_wind_time"]
cluster = Cluster()
session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar")
def getHistory(station, indicator):
datas = session.execute(f"SELECT datee, {indicator} FROM {table_name_space} where station = '{station}'")
datas = session.execute(f"SELECT datetime, {indicator} FROM {table_name_space} where station = '{station}'")
return datas
def getMeanByDay(table, dateMin, dateMax):
# Dictionary to store sum of measures and number of measures by day
table_date = {}
for r in table:
# convert timestamp into year as int
year = int(str(r[0])[0:4])
# Verify the measures is for the period chosen by user and have a value
if year >= dateMin and year < dateMax and r[len(r) - 1] != None:
date = str(r[0])
# convert timestamp into date as string
date = str(r[0])[0:10]
if date not in table_date.keys():
table_date[date] = 0,0
table_date[date] = (table_date[date][0] + r[len(r) - 1], table_date[date][1] + 1)
# Treat datas to get mean by day
for d in table_date.keys():
table_date[d] = table_date[d][0] / table_date[d][1]
return table_date
def getMeanByMonth(table):
# Dictionary to store sum of measures and number of measures by month
table_month = {}
for r in table:
# Verify the value of measure (needs to have a value)
if r[len(r) - 1] != None:
# convert timestamp into month as string
month = str(r[0])[5:7]
if month not in table_month.keys():
table_month[month] = 0,0
table_month[month] = (table_month[month][0] + r[len(r) - 1], table_month[month][1] + 1)
# Treat datas to get mean by month
for d in table_month.keys():
table_month[d] = table_month[d][0] / table_month[d][1]
return table_month
def plotHistory(station, indicator, dateMin, dateMax):
# Accept only indicator with numeric values (not factors)
if indicator in numeric_columns:
# Verification to ensure the validity of parameters, dates not equal
if dateMin == dateMax:
print(f"Les dates ne doivent pas être égales")
return
# Verification to ensure the validity of parameters, dates in the right period
if dateMin < MIN_DATE or dateMin > (MAX_DATE + 1) or dateMax < MIN_DATE or dateMax > (MAX_DATE + 1):
print(f"Les dates doivent être comprises entre {MIN_DATE} et {MAX_DATE}")
return
# Get datas from cassandra table
table = getHistory(station, indicator)
table = list(table)
# If no data for the period selected
if not table:
print(f"Aucune donnée pour la station {station} et pour l'indicateur {indicator}")
return
# Treat datas
table_mean = getMeanByDay(table, dateMin, dateMax)
table_mean_by_month = getMeanByMonth(table)
# Duplicate list for each year
# Duplicate list for each year in the period required
liste = []
for i in range(dateMax - dateMin):
for key,value in table_mean_by_month.items():
liste.append([key, value])
# Completing the month to have a date format (yyyy-month-01)
i = dateMin
j = 1
for k in range(len(liste)):
j += 1
liste[k][0] = str(i) + '-' + str(liste[k][0])
......@@ -90,19 +104,26 @@ def plotHistory(station, indicator, dateMin, dateMax):
currentDateTime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_name = str(currentDateTime) + "_" + station + "_" + indicator + ".png"
# Configure graduation of plot
# Need to find a nice step for graduation (no more than 6 values to be understandable)
step_graduation = max((dateMax - dateMin) // 5, 1)
print(step_graduation)
# Convert the graduation to the date format (yyyy-month-01)
graduation = ["20" + "0" * (2 - len(str(i))) + str(i) + "-01-01" for i in range(int(str(dateMin)[2:4]), int(str(dateMax)[2:4]), step_graduation)]
# Add the last value of graduation for the last day of measures
graduation.append("20" + "0" * (2 - len(str(dateMax - 1)[2:4])) + str(dateMax - 1)[2:4] + "-12-31")
print (graduation)
# Plot, with both measures and season mean
fig, ax1 = plt.subplots()
# Measures on axis 1
ax1.plot_date(table_mean.keys(), table_mean.values(), '-', xdate = True)
ax1.xaxis.set_ticks(graduation)
ax2 = ax1.twiny()
# Seasonal mean
ax2.plot([elt[0] for elt in liste], [elt[1] for elt in liste], '-', color = "r")
# Do not show graduation on the top of the plot
ax2.xaxis.set_ticks([])
# Set title and labels
plt.title(f"Evolution de {indicator} pour la station {station}")
plt.xlabel('Date')
plt.ylabel(indicator)
......@@ -112,9 +133,14 @@ def plotHistory(station, indicator, dateMin, dateMax):
bottom=False,
top=True
)
# Save figure
plt.savefig(file_name)
print(f"Le graphique a été enregistré à {file_name}")
else:
print("Les données pour cet indicateur ne sont pas numériques, impossible de tracer un graphique")
plotHistory("EFKI", "tmpf", 2001, 2005)
\ No newline at end of file
if __name__ == '__main__':
print()
plotHistory("EFKI", "tmpf", 2001, 2005)
print()
\ No newline at end of file
......@@ -6,6 +6,8 @@ FILE_NAME = "asos.txt"
table_name_space = "station"
table_name_date = "date"
numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti","mslp","vsby","gust","skyl1","skyl2","skyl3","skyl4","feel","ice_accretion_1hr","ice_accretion_3hr","ice_accretion_6hr","peak_wind_gust","peak_wind_drct","peak_wind_time"]
# Country: Finland
# Dates : 2001 to 2010
......@@ -52,8 +54,7 @@ def loadata(filename):
data["station"] = r["station"]
valid = dateparser.match(r["valid"]).groupdict()
data["date"] = valid["year"] + "-" + valid["month"] + "-" + valid["day"]
data["time"] = valid["hour"] + ":" + valid["minute"] + ":00"
data["date_and_time"] = valid["year"] + "-" + valid["month"] + "-" + valid["day"] + " " + valid["hour"] + ":" + valid["minute"] + ":00"
data["lon"] = float(r["lon"])
data["lat"] = float(r["lat"])
......@@ -176,8 +177,7 @@ def loadata(filename):
NAME_COLUMNS = """
station ,
datee ,
timee ,
datetime ,
lon ,
lat ,
tmpf ,
......@@ -218,8 +218,7 @@ def dropTableQuery(table):
def createTableQuery(table):
query = f"""CREATE TABLE {table}(
station varchar,
datee date,
timee time,
datetime timestamp,
lon decimal,
lat decimal,
tmpf decimal,
......@@ -249,15 +248,14 @@ def createTableQuery(table):
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY((station), datee, timee)
PRIMARY KEY((station), datetime)
)"""
return query
def createTableQueryPartitionningByDate(table):
query = f"""CREATE TABLE {table}(
station varchar,
datee date,
timee time,
datetime timestamp,
lon decimal,
lat decimal,
tmpf decimal,
......@@ -287,7 +285,7 @@ def createTableQueryPartitionningByDate(table):
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY((datee), station, timee)
PRIMARY KEY((datetime), station)
)"""
return query
......@@ -299,8 +297,7 @@ def insertQueryData(row, table):
)
VALUES(
'{row["station"]}',
'{row["date"]}',
'{row["time"]}',
'{row["date_and_time"]}',
{row["lon"]},
{row["lat"]},
{row["tmpf"]},
......@@ -335,38 +332,38 @@ def insertQueryData(row, table):
"""
return query
cluster = Cluster()
session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar")
dict = loadata(FILE_NAME)
# # --------------------------------------------------------
# # A faire seulement 1 fois pour charger les données
# session.execute(dropTableQuery(table_name_space))
# print(f"Table {table_name_space} dropped")
# session.execute(createTableQuery(table_name_space))
# print(f"Table {table_name_space} created")
# print(f"Starting inserting datas into table {table_name_space}")
# for d in dict:
# session.execute(insertQueryData(d, table_name_space))
# print(f"Datas inserted into {table_name_space}")
# # --------------------------------------------------------
# # --------------------------------------------------------
# # A faire seulement 1 fois pour charger les données
# session.execute(dropTableQuery(table_name_date))
# print(f"Table {table_name_date} dropped")
# session.execute(createTableQueryPartitionningByDate(table_name_date))
# print(f"Table {table_name_date} created")
# print(f"Starting inserting datas into table {table_name_date}")
# for d in dict:
# session.execute(insertQueryData(d, table_name_date))
# print(f"Datas inserted into {table_name_date}")
# # --------------------------------------------------------
\ No newline at end of file
# Not to execute when importing file
if __name__ == '__main__':
cluster = Cluster()
session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar")
dict = loadata(FILE_NAME)
# --------------------------------------------------------
# A faire seulement 1 fois pour charger les données
session.execute(dropTableQuery(table_name_space))
print(f"Table {table_name_space} dropped")
session.execute(createTableQuery(table_name_space))
print(f"Table {table_name_space} created")
print(f"Starting inserting datas into table {table_name_space}")
for d in dict:
session.execute(insertQueryData(d, table_name_space))
print(f"Datas inserted into {table_name_space}")
# --------------------------------------------------------
# --------------------------------------------------------
# A faire seulement 1 fois pour charger les données
session.execute(dropTableQuery(table_name_date))
print(f"Table {table_name_date} dropped")
session.execute(createTableQueryPartitionningByDate(table_name_date))
print(f"Table {table_name_date} created")
print(f"Starting inserting datas into table {table_name_date}")
for d in dict:
session.execute(insertQueryData(d, table_name_date))
print(f"Datas inserted into {table_name_date}")
# --------------------------------------------------------
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment