Commit 4102d587 authored by Manon Dindelli's avatar Manon Dindelli

calcul moyenne par mois et trace les courbes + insertion dans la table par location

parent 99b84035
import csv
import re
def getData(container, value, type):
result = container[value]
if (result == "M" or result == "T") :
result = None
if (type == "string"):
return result
elif (type == "float") :
if (result != None):
result = float(result)
return result
elif (type == "date") :
if (result != None) :
dateparser = re.compile("(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)")
match = dateparser.match(container[value])
if not match :
return "continue"
data = match.groupdict()
result = (
int(data["year"]),
int(data["month"]),
int(data["day"]),
int(data["hour"]),
int(data["minute"]),
)
return result
else :
return None
def loadataLocation(filename):
with open(filename) as f:
for r in csv.DictReader(f):
timestamp = getData(r, "valid", "date")
if (timestamp == "continue" or timestamp == None):
continue
data = {}
data["valid"] = timestamp
data["station"] = getData(r, "station", "string")
data["lon"] = getData(r, "lon", "float")
data["lat"] = getData(r, "lat", "float")
data["tmpc"] = getData(r, "tmpc", "float")
data["dwpc"] = getData(r, "dwpc", "float")
data["relh"] = getData(r, "relh", "float")
data["sknt"] = getData(r, "sknt", "float")
data["p01m"] = getData(r, "p01m", "float")
data["vsby"] = getData(r, "vsby", "float")
data["feel"] = getData(r, "feel", "float")
yield data
import json
from cassandra.cluster import Cluster
#Connexion au cluster
cluster = Cluster(['localhost'])
session = cluster.connect('dbermond_projet')
#Création de la table
query = '''
DROP TABLE IF EXISTS weatherByLocation ;
'''
session.execute(query)
query = '''
CREATE TABLE weatherByLocation(
station text,
valid_year varint,
valid_month varint,
valid_day varint,
valid_hour varint,
valid_minute varint,
lon float,
lat float,
tmpc float,
dwpc float,
relh float,
sknt float,
p01m float,
vsby float,
feel float,
PRIMARY KEY ((station), valid_year, valid_month, valid_day, valid_hour, valid_minute)
);
'''
session.execute(query)
#Fonction de chargement des données dans la table
def getWeatherByLocation(csvfilename, session):
data = loadataLocation(csvfilename)
for r in data:
t = (
r["station"],
r["valid"][0],
r["valid"][1],
r["valid"][2],
r["valid"][3],
r["valid"][4],
r["lon"],
r["lat"],
r["tmpc"],
r["dwpc"],
r["relh"],
r["sknt"],
r["p01m"],
r["vsby"],
r["feel"],
)
query = """
INSERT INTO weatherByLocation(
station,
valid_year,
valid_month,
valid_day,
valid_hour,
valid_minute,
lon,
lat,
tmpc,
dwpc,
relh,
sknt,
p01m,
vsby,
feel)
VALUES (%s, %s, %s ,%s ,%s, %s, %s, %s ,%s ,%s, %s, %s, %s, %s, %s)
"""
session.execute(query, t)
#Chargement des données
getWeatherByLocation("/home/dbermond/Data/asos-2004>2013.txt", session)
\ No newline at end of file
import matplotlib.pyplot as plt
from cassandra.cluster import Cluster
#Connexion au cluster
cluster = Cluster(['localhost'])
session = cluster.connect('dbermond_projet')
# Equivalence entre les mots-clé et leur place dans la table
dictParameter = {
"temperature": 13,
"dew point": 6,
"humidity": 11,
"wind speed": 12,
"precipitation": 10,
"visibility": 14,
"feel": 7,
}
#Récupération des information pour une station à une date donnée
def getDataByStationAndDate(station, year, month):
query = '''
SELECT * FROM weatherbylocation WHERE station = '%s' AND valid_year = %d AND valid_month = %d ;
'''
resultat = session.execute(query % (station, year, month))
return resultat
#Calcul de la moyenne pour un paramétre donné
def getMean(result, parameter):
n = 0
mean = 0
c = dictParameter[parameter]
for row in result:
if (row[c] != None):
n = n + 1
mean += (row[c] - mean)/n
return mean
#Dessin des courbes
def getHistoryByStation(station, parameter):
months = [1,2,3,4,5,6,7,8,9,10,11,12]
year = 2010
meanSeason = [0,0,0,0,0,0,0,0,0,0,0,0,]
for i in range(0,3):
year += 1
month = 0
listMean = []
for j in range(0,12):
month += 1
try :
result = getDataByStationAndDate(station, year, month)
except:
print("Erreur lors de la récupération des données")
if result != None:
mean = getMean(result, parameter)
listMean.append(mean) #Stock la moyenne de chaque mois dans une liste
meanSeason[j] += mean #Variable intermédiraire pour calculer la moyenne par mois sur plusieurs années
p = plt.plot(months, listMean, label=str(year)) #Traçage de la courbe pour une année
for k in range(0,12):
meanSeason[k] = meanSeason[k]/3
plt.ylabel(parameter)
plt.xlabel("Mois")
plt.plot(months, meanSeason, label='Moyenne') #Traçage de la courbe moyenne
plt.legend()
plt.savefig("courbeMean.png") #Sauvegarde des courbes
plt.clf()
return meanSeason
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment