Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Thomas Meurou
NF26 Project
Commits
a9a2dd8f
Commit
a9a2dd8f
authored
Jun 18, 2019
by
Unknown
Browse files
Add loading database script for question 2
parent
3759ed24
Changes
1
Hide whitespace changes
Inline
Side-by-side
database_pre2.py
0 → 100644
View file @
a9a2dd8f
import
cassandra.cluster
import
csv
import
re
def
connection
():
import
cassandra.cluster
cluster
=
cassandra
.
cluster
.
Cluster
([
'localhost'
])
session
=
cluster
.
connect
(
'meurouth_cql'
)
return
session
def
databaseCreate_Q2
(
session
):
query
=
"""
CREATE TABLE database_time (
date timestamp,
lon float,
lat float,
station varchar,
tmpf float,
dwpf float,
relh float,
drct float,
sknt float,
p01i float,
alti float,
mslp float,
vsby float,
gust float,
skyc1 varchar,
skyc2 varchar,
skyc3 varchar,
skyc4 varchar,
skyl1 float,
skyl2 float,
skyl3 float,
skyl4 float,
wxcodes varchar,
ice_accretion_1hr float,
ice_accretion_3hr float,
ice_accretion_6hr float,
peak_wind_gust float,
peak_wind_drct float,
peak_wind_time varchar,
feel float,
metar varchar,
PRIMARY KEY ((date),lon,lat,station)
)"""
session
.
execute
(
query
)
print
(
"DATA BASE database_time created!"
)
def
load_data
(
filename
):
with
open
(
filename
)
as
f
:
for
r
in
csv
.
DictReader
(
f
):
if
not
r
[
"valid"
]:
continue
for
colonne
in
r
:
if
r
[
colonne
]
==
"M"
:
r
[
colonne
]
=
"nan"
data
=
{}
data
[
"date"
]
=
r
[
"valid"
]
data
[
"station"
]
=
r
[
"station"
]
data
[
"lon"
]
=
float
(
r
[
"lon"
])
data
[
"lat"
]
=
float
(
r
[
"lat"
])
data
[
"tmpf"
]
=
float
(
r
[
"tmpf"
])
data
[
"dwpf"
]
=
float
(
r
[
"dwpf"
])
data
[
"relh"
]
=
float
(
r
[
"relh"
])
data
[
"drct"
]
=
float
(
r
[
"drct"
])
data
[
"sknt"
]
=
float
(
r
[
"sknt"
])
data
[
"p01i"
]
=
float
(
r
[
"p01i"
])
data
[
"alti"
]
=
float
(
r
[
"alti"
])
data
[
"mslp"
]
=
float
(
r
[
"mslp"
])
data
[
"vsby"
]
=
float
(
r
[
"vsby"
])
data
[
"gust"
]
=
float
(
r
[
"gust"
])
data
[
"skyc1"
]
=
r
[
"skyc1"
]
data
[
"skyc2"
]
=
r
[
"skyc2"
]
data
[
"skyc3"
]
=
r
[
"skyc3"
]
data
[
"skyc4"
]
=
r
[
"skyc4"
]
data
[
"skyl1"
]
=
float
(
r
[
"skyl1"
])
data
[
"skyl2"
]
=
float
(
r
[
"skyl2"
])
data
[
"skyl3"
]
=
float
(
r
[
"skyl3"
])
data
[
"skyl4"
]
=
float
(
r
[
"skyl4"
])
data
[
"wxcodes"
]
=
r
[
"wxcodes"
]
data
[
"ice_accretion_1hr"
]
=
float
(
r
[
"ice_accretion_1hr"
])
data
[
"ice_accretion_3hr"
]
=
float
(
r
[
"ice_accretion_3hr"
])
data
[
"ice_accretion_6hr"
]
=
float
(
r
[
"ice_accretion_6hr"
])
data
[
"peak_wind_gust"
]
=
float
(
r
[
"peak_wind_gust"
])
data
[
"peak_wind_drct"
]
=
float
(
r
[
"peak_wind_drct"
])
data
[
"peak_wind_time"
]
=
r
[
"peak_wind_time"
]
data
[
"feel"
]
=
float
(
r
[
"feel"
])
data
[
"metar"
]
=
r
[
"metar"
]
yield
data
def
insertion_sql_Q2
(
filename
,
session
):
target
=
load_data
(
filename
)
i
=
1
for
data
in
target
:
i
+=
1
k
=
0
if
(
i
%
500
==
0
):
k
+=
1
print
(
k
,
". 500 finished....."
)
ligne
=
(
data
[
"date"
],
data
[
"lon"
],
data
[
"lat"
],
data
[
"station"
],
data
[
"tmpf"
],
data
[
"dwpf"
],
data
[
"relh"
],
data
[
"drct"
],
data
[
"sknt"
],
data
[
"p01i"
],
data
[
"alti"
],
data
[
"mslp"
],
data
[
"vsby"
],
data
[
"gust"
],
data
[
"skyc1"
],
data
[
"skyc2"
],
data
[
"skyc3"
],
data
[
"skyc4"
],
data
[
"skyl1"
],
data
[
"skyl2"
],
data
[
"skyl3"
],
data
[
"skyl4"
],
data
[
"wxcodes"
],
data
[
"ice_accretion_1hr"
],
data
[
"ice_accretion_3hr"
],
data
[
"ice_accretion_6hr"
],
data
[
"peak_wind_gust"
],
data
[
"peak_wind_drct"
],
data
[
"peak_wind_time"
],
data
[
"feel"
],
data
[
"metar"
])
query
=
"""
INSERT INTO database_kmeans(
date,
lon,
lat,
station,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
session
.
execute
(
query
,
ligne
)
if
__name__
==
"__main__"
:
session
=
connection
()
databaseCreate_Q2
(
session
)
insertion_sql_Q2
(
"Projet-NF26/data.csv"
,
session
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment