Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
N
nf26-metar
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Simon Bazin
nf26-metar
Commits
7e2e0f67
Commit
7e2e0f67
authored
Jun 17, 2019
by
sim-baz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Load data
Complete data file
parent
792d7f18
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
368236 additions
and
41 deletions
+368236
-41
asos.txt
asos.txt
+367955
-7
loading.py
loading.py
+281
-34
No files found.
asos.txt
View file @
7e2e0f67
This diff is collapsed.
Click to expand it.
loading.py
View file @
7e2e0f67
...
...
@@ -2,12 +2,16 @@ from cassandra.cluster import Cluster
import
csv
import
re
FILE_NAME
=
"asos.txt"
# Country: Finland
# Dates : 2001 to 2010
## datas format:
# station: three or four character site identifier
# valid: timestamp of the observation
# lon: longitude of the station
# lat: latitude of the station
# tmpf: Air Temperature in Fahrenheit, typically @ 2 meters
# dwpf: Dew Point Temperature in Fahrenheit, typically @ 2 meters
# relh: Relative Humidity in %
...
...
@@ -45,43 +49,286 @@ def loadata(filename):
data
=
{}
data
[
"station"
]
=
r
[
"station"
]
valid
=
dateparser
.
match
(
r
[
"valid"
]).
groupdict
()
data
[
"date_obs"
]
=
(
int
(
valid
[
"year"
]),
int
(
valid
[
"month"
]),
int
(
valid
[
"day"
]),
)
data
[
"time_obs"
]
=
(
int
(
valid
[
"hour"
]),
int
(
valid
[
"minute"
]),
)
data
[
"year"
]
=
int
(
valid
[
"year"
])
data
[
"month"
]
=
int
(
valid
[
"month"
])
data
[
"day"
]
=
int
(
valid
[
"day"
])
data
[
"hour"
]
=
int
(
valid
[
"hour"
])
data
[
"minute"
]
=
int
(
valid
[
"minute"
])
data
[
"lon"
]
=
float
(
r
[
"lon"
])
data
[
"lat"
]
=
float
(
r
[
"lat"
])
data
[
"tmpf"
]
=
float
(
r
[
"tmpf"
])
data
[
"dwpf"
]
=
float
(
r
[
"dwpf"
])
data
[
"relh"
]
=
float
(
r
[
"relh"
])
data
[
"drct"
]
=
float
(
r
[
"drct"
])
data
[
"sknt"
]
=
float
(
r
[
"sknt"
])
data
[
"p01i"
]
=
float
(
r
[
"p01i"
])
data
[
"alti"
]
=
float
(
r
[
"alti"
])
data
[
"mslp"
]
=
float
(
r
[
"mslp"
])
data
[
"vsby"
]
=
float
(
r
[
"vsby"
])
data
[
"gust"
]
=
float
(
r
[
"gust"
])
data
[
"skyc1"
]
=
float
(
r
[
"skyc1"
])
data
[
"skyc2"
]
=
float
(
r
[
"skyc2"
])
data
[
"skyc3"
]
=
float
(
r
[
"skyc3"
])
data
[
"skyc4"
]
=
float
(
r
[
"skyc4"
])
data
[
"skyl1"
]
=
float
(
r
[
"skyl1"
])
data
[
"skyl2"
]
=
float
(
r
[
"skyl2"
])
data
[
"skyl3"
]
=
float
(
r
[
"skyl3"
])
data
[
"skyl4"
]
=
float
(
r
[
"skyl4"
])
if
r
[
"tmpf"
]
==
'null'
:
data
[
"tmpf"
]
=
'null'
else
:
data
[
"tmpf"
]
=
float
(
r
[
"tmpf"
])
if
r
[
"dwpf"
]
==
'null'
:
data
[
"dwpf"
]
=
'null'
else
:
data
[
"dwpf"
]
=
float
(
r
[
"dwpf"
])
if
r
[
"relh"
]
==
'null'
:
data
[
"relh"
]
=
'null'
else
:
data
[
"relh"
]
=
float
(
r
[
"relh"
])
if
r
[
"drct"
]
==
'null'
:
data
[
"drct"
]
=
'null'
else
:
data
[
"drct"
]
=
float
(
r
[
"drct"
])
if
r
[
"sknt"
]
==
'null'
:
data
[
"sknt"
]
=
'null'
else
:
data
[
"sknt"
]
=
float
(
r
[
"sknt"
])
if
r
[
"p01i"
]
==
'null'
:
data
[
"p01i"
]
=
'null'
else
:
data
[
"p01i"
]
=
float
(
r
[
"p01i"
])
if
r
[
"alti"
]
==
'null'
:
data
[
"alti"
]
=
'null'
else
:
data
[
"alti"
]
=
float
(
r
[
"alti"
])
if
r
[
"mslp"
]
==
'null'
:
data
[
"mslp"
]
=
'null'
else
:
data
[
"mslp"
]
=
float
(
r
[
"mslp"
])
if
r
[
"vsby"
]
==
'null'
:
data
[
"vsby"
]
=
'null'
else
:
data
[
"vsby"
]
=
float
(
r
[
"vsby"
])
if
r
[
"gust"
]
==
'null'
:
data
[
"gust"
]
=
'null'
else
:
data
[
"gust"
]
=
float
(
r
[
"gust"
])
data
[
"skyc1"
]
=
r
[
"skyc1"
]
data
[
"skyc2"
]
=
r
[
"skyc2"
]
data
[
"skyc3"
]
=
r
[
"skyc3"
]
data
[
"skyc4"
]
=
r
[
"skyc4"
]
if
r
[
"skyl1"
]
==
'null'
:
data
[
"skyl1"
]
=
'null'
else
:
data
[
"skyl1"
]
=
float
(
r
[
"skyl1"
])
if
r
[
"skyl2"
]
==
'null'
:
data
[
"skyl2"
]
=
'null'
else
:
data
[
"skyl2"
]
=
float
(
r
[
"skyl2"
])
if
r
[
"skyl3"
]
==
'null'
:
data
[
"skyl3"
]
=
'null'
else
:
data
[
"skyl3"
]
=
float
(
r
[
"skyl3"
])
if
r
[
"skyl4"
]
==
'null'
:
data
[
"skyl4"
]
=
'null'
else
:
data
[
"skyl4"
]
=
float
(
r
[
"skyl4"
])
data
[
"wxcodes"
]
=
r
[
"wxcodes"
]
data
[
"feel"
]
=
float
(
r
[
"feel"
])
data
[
"ice_accretion_1hr"
]
=
float
(
r
[
"ice_accretion_1hr"
])
data
[
"ice_accretion_3hr"
]
=
float
(
r
[
"ice_accretion_3hr"
])
data
[
"ice_accretion_6hr"
]
=
float
(
r
[
"ice_accretion_6hr"
])
data
[
"peak_wind_gust"
]
=
float
(
r
[
"peak_wind_gust"
])
data
[
"peak_wind_drct"
]
=
float
(
r
[
"peak_wind_drct"
])
data
[
"peak_wind_time"
]
=
float
(
r
[
"peak_wind_time"
])
if
r
[
"feel"
]
==
'null'
:
data
[
"feel"
]
=
'null'
else
:
data
[
"feel"
]
=
float
(
r
[
"feel"
])
if
r
[
"ice_accretion_1hr"
]
==
'null'
:
data
[
"ice_accretion_1hr"
]
=
'null'
else
:
data
[
"ice_accretion_1hr"
]
=
float
(
r
[
"ice_accretion_1hr"
])
if
r
[
"ice_accretion_3hr"
]
==
'null'
:
data
[
"ice_accretion_3hr"
]
=
'null'
else
:
data
[
"ice_accretion_3hr"
]
=
float
(
r
[
"ice_accretion_3hr"
])
if
r
[
"ice_accretion_6hr"
]
==
'null'
:
data
[
"ice_accretion_6hr"
]
=
'null'
else
:
data
[
"ice_accretion_6hr"
]
=
float
(
r
[
"ice_accretion_6hr"
])
if
r
[
"peak_wind_gust"
]
==
'null'
:
data
[
"peak_wind_gust"
]
=
'null'
else
:
data
[
"peak_wind_gust"
]
=
float
(
r
[
"peak_wind_gust"
])
if
r
[
"peak_wind_drct"
]
==
'null'
:
data
[
"peak_wind_drct"
]
=
'null'
else
:
data
[
"peak_wind_drct"
]
=
float
(
r
[
"peak_wind_drct"
])
if
r
[
"peak_wind_time"
]
==
'null'
:
data
[
"peak_wind_time"
]
=
'null'
else
:
data
[
"peak_wind_time"
]
=
float
(
r
[
"peak_wind_time"
])
data
[
"metar"
]
=
r
[
"metar"
]
yield
data
NAME_COLUMNS
=
"""
station ,
year ,
month ,
day ,
hour ,
minute ,
lon ,
lat ,
tmpf ,
dwpf ,
relh ,
drct ,
sknt ,
p01i ,
alti ,
mslp ,
vsby ,
gust ,
skyc1 ,
skyc2 ,
skyc3 ,
skyc4 ,
skyl1 ,
skyl2 ,
skyl3 ,
skyl4 ,
wxcodes ,
feel ,
ice_accretion_1hr ,
ice_accretion_3hr ,
ice_accretion_6hr ,
peak_wind_gust ,
peak_wind_drct ,
peak_wind_time ,
metar
"""
def
dropTableQuery
(
table
):
return
f"""
DROP TABLE IF EXISTS
{
table
}
"""
def
createTableQuery
(
table
):
query
=
f"""CREATE TABLE
{
table
}
(
station varchar,
year int,
month int,
day int,
hour int,
minute int,
lon decimal,
lat decimal,
tmpf decimal,
dwpf decimal,
relh decimal,
drct decimal,
sknt decimal,
p01i decimal,
alti decimal,
mslp decimal,
vsby decimal,
gust decimal,
skyc1 varchar,
skyc2 varchar,
skyc3 varchar,
skyc4 varchar,
skyl1 decimal,
skyl2 decimal,
skyl3 decimal,
skyl4 decimal,
wxcodes varchar,
feel decimal,
ice_accretion_1hr decimal,
ice_accretion_3hr decimal,
ice_accretion_6hr decimal,
peak_wind_gust decimal,
peak_wind_drct decimal,
peak_wind_time decimal,
metar varchar,
PRIMARY KEY(station)
)"""
return
query
def
insertQueryData
(
row
,
table
):
query
=
f"""
INSERT INTO
{
table
}
(
{
NAME_COLUMNS
}
)
VALUES(
'
{
row
[
"station"
]
}
',
{
row
[
"year"
]
}
,
{
row
[
"month"
]
}
,
{
row
[
"day"
]
}
,
{
row
[
"hour"
]
}
,
{
row
[
"minute"
]
}
,
{
row
[
"lon"
]
}
,
{
row
[
"lat"
]
}
,
{
row
[
"tmpf"
]
}
,
{
row
[
"dwpf"
]
}
,
{
row
[
"relh"
]
}
,
{
row
[
"drct"
]
}
,
{
row
[
"sknt"
]
}
,
{
row
[
"p01i"
]
}
,
{
row
[
"alti"
]
}
,
{
row
[
"mslp"
]
}
,
{
row
[
"vsby"
]
}
,
{
row
[
"gust"
]
}
,
'
{
row
[
"skyc1"
]
}
',
'
{
row
[
"skyc2"
]
}
',
'
{
row
[
"skyc3"
]
}
',
'
{
row
[
"skyc4"
]
}
',
{
row
[
"skyl1"
]
}
,
{
row
[
"skyl2"
]
}
,
{
row
[
"skyl3"
]
}
,
{
row
[
"skyl4"
]
}
,
'
{
row
[
"wxcodes"
]
}
',
{
row
[
"feel"
]
}
,
{
row
[
"ice_accretion_1hr"
]
}
,
{
row
[
"ice_accretion_3hr"
]
}
,
{
row
[
"ice_accretion_6hr"
]
}
,
{
row
[
"peak_wind_gust"
]
}
,
{
row
[
"peak_wind_drct"
]
}
,
{
row
[
"peak_wind_time"
]
}
,
'
{
row
[
"metar"
]
}
'
)
;
"""
return
query
cluster
=
Cluster
()
session
=
cluster
.
connect
()
session
.
set_keyspace
(
"bazinsim_roisinos_metar"
)
dict
=
loadata
(
FILE_NAME
)
# # --------------------------------------------------------
# # A faire seulement 1 fois pour charger les données
# table_name = "data"
# session.execute(dropTableQuery(table_name))
# print(f"Table {table_name} dropped")
# session.execute(createTableQuery(table_name))
# print(f"Table {table_name} created")
# print(f"Starting inserting datas into table {table_name}")
# for d in dict:
# session.execute(insertQueryData(d, table_name))
# print(f"Datas inserted into {table_name}")
# # --------------------------------------------------------
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment