Commit 7e2e0f67 authored by sim-baz's avatar sim-baz

Load data

Complete data file
parent 792d7f18
This diff is collapsed.
......@@ -2,12 +2,16 @@ from cassandra.cluster import Cluster
import csv
import re
FILE_NAME = "asos.txt"
# Country: Finland
# Dates : 2001 to 2010
## datas format:
# station: three or four character site identifier
# valid: timestamp of the observation
# lon: longitude of the station
# lat: latitude of the station
# tmpf: Air Temperature in Fahrenheit, typically @ 2 meters
# dwpf: Dew Point Temperature in Fahrenheit, typically @ 2 meters
# relh: Relative Humidity in %
......@@ -45,43 +49,286 @@ def loadata(filename):
data = {}
data["station"] = r["station"]
valid = dateparser.match(r["valid"]).groupdict()
data["date_obs"] = (
int(valid["year"]),
int(valid["month"]),
int(valid["day"]),
)
data["time_obs"] = (
int(valid["hour"]),
int(valid["minute"]),
)
data["year"] = int(valid["year"])
data["month"] = int(valid["month"])
data["day"] = int(valid["day"])
data["hour"] = int(valid["hour"])
data["minute"] = int(valid["minute"])
data["lon"] = float(r["lon"])
data["lat"] = float(r["lat"])
data["tmpf"] = float(r["tmpf"])
data["dwpf"] = float(r["dwpf"])
data["relh"] = float(r["relh"])
data["drct"] = float(r["drct"])
data["sknt"] = float(r["sknt"])
data["p01i"] = float(r["p01i"])
data["alti"] = float(r["alti"])
data["mslp"] = float(r["mslp"])
data["vsby"] = float(r["vsby"])
data["gust"] = float(r["gust"])
data["skyc1"] = float(r["skyc1"])
data["skyc2"] = float(r["skyc2"])
data["skyc3"] = float(r["skyc3"])
data["skyc4"] = float(r["skyc4"])
data["skyl1"] = float(r["skyl1"])
data["skyl2"] = float(r["skyl2"])
data["skyl3"] = float(r["skyl3"])
data["skyl4"] = float(r["skyl4"])
if r["tmpf"] == 'null':
data["tmpf"] = 'null'
else:
data["tmpf"] = float(r["tmpf"])
if r["dwpf"] == 'null':
data["dwpf"] = 'null'
else:
data["dwpf"] = float(r["dwpf"])
if r["relh"] == 'null':
data["relh"] = 'null'
else:
data["relh"] = float(r["relh"])
if r["drct"] == 'null':
data["drct"] = 'null'
else:
data["drct"] = float(r["drct"])
if r["sknt"] == 'null':
data["sknt"] = 'null'
else:
data["sknt"] = float(r["sknt"])
if r["p01i"] == 'null':
data["p01i"] = 'null'
else:
data["p01i"] = float(r["p01i"])
if r["alti"] == 'null':
data["alti"] = 'null'
else:
data["alti"] = float(r["alti"])
if r["mslp"] == 'null':
data["mslp"] = 'null'
else:
data["mslp"] = float(r["mslp"])
if r["vsby"] == 'null':
data["vsby"] = 'null'
else:
data["vsby"] = float(r["vsby"])
if r["gust"] == 'null':
data["gust"] = 'null'
else:
data["gust"] = float(r["gust"])
data["skyc1"] = r["skyc1"]
data["skyc2"] = r["skyc2"]
data["skyc3"] = r["skyc3"]
data["skyc4"] = r["skyc4"]
if r["skyl1"] == 'null':
data["skyl1"] = 'null'
else:
data["skyl1"] = float(r["skyl1"])
if r["skyl2"] == 'null':
data["skyl2"] = 'null'
else:
data["skyl2"] = float(r["skyl2"])
if r["skyl3"] == 'null':
data["skyl3"] = 'null'
else:
data["skyl3"] = float(r["skyl3"])
if r["skyl4"] == 'null':
data["skyl4"] = 'null'
else:
data["skyl4"] = float(r["skyl4"])
data["wxcodes"] = r["wxcodes"]
data["feel"] = float(r["feel"])
data["ice_accretion_1hr"] = float(r["ice_accretion_1hr"])
data["ice_accretion_3hr"] = float(r["ice_accretion_3hr"])
data["ice_accretion_6hr"] = float(r["ice_accretion_6hr"])
data["peak_wind_gust"] = float(r["peak_wind_gust"])
data["peak_wind_drct"] = float(r["peak_wind_drct"])
data["peak_wind_time"] = float(r["peak_wind_time"])
if r["feel"] == 'null':
data["feel"] = 'null'
else:
data["feel"] = float(r["feel"])
if r["ice_accretion_1hr"] == 'null':
data["ice_accretion_1hr"] = 'null'
else:
data["ice_accretion_1hr"] = float(r["ice_accretion_1hr"])
if r["ice_accretion_3hr"] == 'null':
data["ice_accretion_3hr"] = 'null'
else:
data["ice_accretion_3hr"] = float(r["ice_accretion_3hr"])
if r["ice_accretion_6hr"] == 'null':
data["ice_accretion_6hr"] = 'null'
else:
data["ice_accretion_6hr"] = float(r["ice_accretion_6hr"])
if r["peak_wind_gust"] == 'null':
data["peak_wind_gust"] = 'null'
else:
data["peak_wind_gust"] = float(r["peak_wind_gust"])
if r["peak_wind_drct"] == 'null':
data["peak_wind_drct"] = 'null'
else:
data["peak_wind_drct"] = float(r["peak_wind_drct"])
if r["peak_wind_time"] == 'null':
data["peak_wind_time"] = 'null'
else:
data["peak_wind_time"] = float(r["peak_wind_time"])
data["metar"] = r["metar"]
yield data
NAME_COLUMNS = """
station ,
year ,
month ,
day ,
hour ,
minute ,
lon ,
lat ,
tmpf ,
dwpf ,
relh ,
drct ,
sknt ,
p01i ,
alti ,
mslp ,
vsby ,
gust ,
skyc1 ,
skyc2 ,
skyc3 ,
skyc4 ,
skyl1 ,
skyl2 ,
skyl3 ,
skyl4 ,
wxcodes ,
feel ,
ice_accretion_1hr ,
ice_accretion_3hr ,
ice_accretion_6hr ,
peak_wind_gust ,
peak_wind_drct ,
peak_wind_time ,
metar
"""
def dropTableQuery(table):
return f"""
DROP TABLE IF EXISTS {table}
"""
def createTableQuery(table):
query = f"""CREATE TABLE {table}(
station varchar,
year int,
month int,
day int,
hour int,
minute int,
lon decimal,
lat decimal,
tmpf decimal,
dwpf decimal,
relh decimal,
drct decimal,
sknt decimal,
p01i decimal,
alti decimal,
mslp decimal,
vsby decimal,
gust decimal,
skyc1 varchar,
skyc2 varchar,
skyc3 varchar,
skyc4 varchar,
skyl1 decimal,
skyl2 decimal,
skyl3 decimal,
skyl4 decimal,
wxcodes varchar,
feel decimal,
ice_accretion_1hr decimal,
ice_accretion_3hr decimal,
ice_accretion_6hr decimal,
peak_wind_gust decimal,
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY(station)
)"""
return query
def insertQueryData(row, table):
query = f"""
INSERT INTO
{table}(
{NAME_COLUMNS}
)
VALUES(
'{row["station"]}',
{row["year"]},
{row["month"]},
{row["day"]},
{row["hour"]},
{row["minute"]},
{row["lon"]},
{row["lat"]},
{row["tmpf"]},
{row["dwpf"]},
{row["relh"]},
{row["drct"]},
{row["sknt"]},
{row["p01i"]},
{row["alti"]},
{row["mslp"]},
{row["vsby"]},
{row["gust"]},
'{row["skyc1"]}',
'{row["skyc2"]}',
'{row["skyc3"]}',
'{row["skyc4"]}',
{row["skyl1"]},
{row["skyl2"]},
{row["skyl3"]},
{row["skyl4"]},
'{row["wxcodes"]}',
{row["feel"]},
{row["ice_accretion_1hr"]},
{row["ice_accretion_3hr"]},
{row["ice_accretion_6hr"]},
{row["peak_wind_gust"]},
{row["peak_wind_drct"]},
{row["peak_wind_time"]},
'{row["metar"]}'
)
;
"""
return query
cluster = Cluster()
session = cluster.connect()
session.set_keyspace("bazinsim_roisinos_metar")
dict = loadata(FILE_NAME)
# # --------------------------------------------------------
# # A faire seulement 1 fois pour charger les données
# table_name = "data"
# session.execute(dropTableQuery(table_name))
# print(f"Table {table_name} dropped")
# session.execute(createTableQuery(table_name))
# print(f"Table {table_name} created")
# print(f"Starting inserting datas into table {table_name}")
# for d in dict:
# session.execute(insertQueryData(d, table_name))
# print(f"Datas inserted into {table_name}")
# # --------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment