Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Thomas Meurou
NF26 Project
Commits
f95b3734
Commit
f95b3734
authored
Jun 21, 2019
by
Tianyang
Browse files
Null insert
parent
ca81e57a
Changes
7
Hide whitespace changes
Inline
Side-by-side
.DS_Store
View file @
f95b3734
No preview for this file type
database_pre1.py
View file @
f95b3734
...
...
@@ -124,6 +124,38 @@ def load_data(filename):
#Create the query according to if each collonne's value is null or not
def
createQuery
(
data
):
result
=
dict
()
for
each
in
data
:
if
data
[
each
]
!=
"nan"
and
str
(
data
[
each
])
!=
'nan'
:
result
[
each
]
=
data
[
each
]
ligne_value
=
[]
for
each
in
result
:
ligne_value
.
append
(
result
[
each
])
ligne_value
=
tuple
(
ligne_value
)
ligne
=
[]
for
each
in
result
:
ligne
.
append
(
each
)
ligne
=
tuple
(
ligne
)
#connect the query together
query
=
"INSERT INTO database_espace("
for
eachc
in
ligne
:
query
+=
str
(
eachc
)
+
","
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
") VALUES("
longth
=
len
(
ligne
)
for
_
in
range
(
longth
):
query
+=
"%s,"
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
");"
return
query
,
ligne_value
def
insection_sql_Q1
(
filename
,
session
):
target
=
load_data
(
filename
)
i
=
1
...
...
@@ -131,90 +163,12 @@ def insection_sql_Q1(filename,session):
i
+=
1
if
(
i
%
500
==
0
):
print
(
"500 finished....."
)
ligne
=
(
data
[
"station"
],
data
[
"year"
],
data
[
"season"
],
data
[
"month"
],
data
[
"day"
],
data
[
"hour"
],
data
[
"minute"
],
data
[
"lon"
],
data
[
"lat"
],
data
[
"tmpf"
],
data
[
"dwpf"
],
data
[
"relh"
],
data
[
"drct"
],
data
[
"sknt"
],
data
[
"p01i"
],
data
[
"alti"
],
data
[
"mslp"
],
data
[
"vsby"
],
data
[
"gust"
],
data
[
"skyc1"
],
data
[
"skyc2"
],
data
[
"skyc3"
],
data
[
"skyc4"
],
data
[
"skyl1"
],
data
[
"skyl2"
],
data
[
"skyl3"
],
data
[
"skyl4"
],
data
[
"wxcodes"
],
data
[
"ice_accretion_1hr"
],
data
[
"ice_accretion_3hr"
],
data
[
"ice_accretion_6hr"
],
data
[
"peak_wind_gust"
],
data
[
"peak_wind_drct"
],
data
[
"peak_wind_time"
],
data
[
"feel"
],
data
[
"metar"
])
query
=
"""
INSERT INTO database_espace(
station,
year,
season,
month,
day,
hour,
minute,
lon,
lat,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query
,
ligne
=
createQuery
(
data
)
session
.
execute
(
query
,
ligne
)
if
__name__
==
"__main__"
:
session
=
connection
()
databaseCreate_Q1
(
session
)
...
...
database_pre2.py
View file @
f95b3734
...
...
@@ -99,6 +99,34 @@ def load_data(filename):
yield
data
#Create the query according to if each collonne's value is null or not
def
createQuery
(
data
):
result
=
dict
()
for
each
in
data
:
if
data
[
each
]
!=
"nan"
and
str
(
data
[
each
])
!=
'nan'
:
result
[
each
]
=
data
[
each
]
ligne_value
=
[]
for
each
in
result
:
ligne_value
.
append
(
result
[
each
])
ligne_value
=
tuple
(
ligne_value
)
ligne
=
[]
for
each
in
result
:
ligne
.
append
(
each
)
ligne
=
tuple
(
ligne
)
#connect the query together
query
=
"INSERT INTO database_time("
for
eachc
in
ligne
:
query
+=
str
(
eachc
)
+
","
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
") VALUES("
longth
=
len
(
ligne
)
for
i
in
range
(
longth
):
query
+=
"%s,"
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
");"
return
query
,
ligne_value
def
insertion_sql_Q2
(
filename
,
session
):
target
=
load_data
(
filename
)
...
...
@@ -109,81 +137,10 @@ def insertion_sql_Q2(filename,session):
if
(
i
%
500
==
0
):
k
+=
1
print
(
k
,
". 500 finished....."
)
ligne
=
(
data
[
"date"
],
data
[
"lon"
],
data
[
"lat"
],
data
[
"station"
],
data
[
"tmpf"
],
data
[
"dwpf"
],
data
[
"relh"
],
data
[
"drct"
],
data
[
"sknt"
],
data
[
"p01i"
],
data
[
"alti"
],
data
[
"mslp"
],
data
[
"vsby"
],
data
[
"gust"
],
data
[
"skyc1"
],
data
[
"skyc2"
],
data
[
"skyc3"
],
data
[
"skyc4"
],
data
[
"skyl1"
],
data
[
"skyl2"
],
data
[
"skyl3"
],
data
[
"skyl4"
],
data
[
"wxcodes"
],
data
[
"ice_accretion_1hr"
],
data
[
"ice_accretion_3hr"
],
data
[
"ice_accretion_6hr"
],
data
[
"peak_wind_gust"
],
data
[
"peak_wind_drct"
],
data
[
"peak_wind_time"
],
data
[
"feel"
],
data
[
"metar"
])
query
=
"""
INSERT INTO database_kmeans(
date,
lon,
lat,
station,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query
,
ligne
=
createQuery
(
data
)
session
.
execute
(
query
,
ligne
)
if
__name__
==
"__main__"
:
session
=
connection
()
databaseCreate_Q2
(
session
)
...
...
database_pre3.py
View file @
f95b3734
...
...
@@ -100,6 +100,41 @@ def load_data(filename):
#Create the query according to if each collonne's value is null or not
def
createQuery
(
data
):
result
=
dict
()
for
each
in
data
:
if
data
[
each
]
!=
"nan"
and
str
(
data
[
each
])
!=
'nan'
:
result
[
each
]
=
data
[
each
]
ligne_value
=
[]
for
each
in
result
:
ligne_value
.
append
(
result
[
each
])
ligne_value
=
tuple
(
ligne_value
)
ligne
=
[]
for
each
in
result
:
ligne
.
append
(
each
)
ligne
=
tuple
(
ligne
)
#connect the query together
query
=
"INSERT INTO database_kmeans("
for
eachc
in
ligne
:
query
+=
str
(
eachc
)
+
","
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
") VALUES("
longth
=
len
(
ligne
)
for
_
in
range
(
longth
):
query
+=
"%s,"
query
=
""
.
join
(
list
(
query
)[:
-
1
])
+
");"
return
query
,
ligne_value
def
insection_sql_Q3
(
filename
,
session
):
target
=
load_data
(
filename
)
i
=
1
...
...
@@ -109,75 +144,7 @@ def insection_sql_Q3(filename,session):
if
(
i
%
500
==
0
):
k
+=
1
print
(
k
,
". 500 finished....."
)
ligne
=
(
data
[
"date"
],
data
[
"lon"
],
data
[
"lat"
],
data
[
"station"
],
data
[
"tmpf"
],
data
[
"dwpf"
],
data
[
"relh"
],
data
[
"drct"
],
data
[
"sknt"
],
data
[
"p01i"
],
data
[
"alti"
],
data
[
"mslp"
],
data
[
"vsby"
],
data
[
"gust"
],
data
[
"skyc1"
],
data
[
"skyc2"
],
data
[
"skyc3"
],
data
[
"skyc4"
],
data
[
"skyl1"
],
data
[
"skyl2"
],
data
[
"skyl3"
],
data
[
"skyl4"
],
data
[
"wxcodes"
],
data
[
"ice_accretion_1hr"
],
data
[
"ice_accretion_3hr"
],
data
[
"ice_accretion_6hr"
],
data
[
"peak_wind_gust"
],
data
[
"peak_wind_drct"
],
data
[
"peak_wind_time"
],
data
[
"feel"
],
data
[
"metar"
])
query
=
"""
INSERT INTO database_kmeans(
date,
lon,
lat,
station,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query
,
ligne
=
createQuery
(
data
)
session
.
execute
(
query
,
ligne
)
...
...
question1.py
View file @
f95b3734
...
...
@@ -68,10 +68,6 @@ def mapFonction1 (x):
def
mapFonction2
(
x
):
return
[
x
[
1
]
/
x
[
0
],
x
[
2
],
x
[
3
]]
#Test if it is a Nan
def
testNan
(
x
):
test
=
x
!=
x
return
test
#Map reduce fonction
def
mapReduce_mmm
(
data
,
timeNB
,
targetNB
):
...
...
@@ -86,7 +82,7 @@ def mapReduce_mmm(data,timeNB,targetNB):
else
:
assert
1
==
2
,
"Doesn
\'
t exits!"
data_target
=
row
[
targetNB
]
if
testNan
(
data_time
)
or
testNan
(
data_target
):
if
str
(
data_time
)
==
'null'
or
str
(
data_target
)
==
'null'
:
continue
if
results
.
get
(
data_time
)
is
None
:
results
[
data_time
]
=
mapFonction1
(
data_target
)
...
...
question2.py
View file @
f95b3734
...
...
@@ -30,7 +30,7 @@ def createMap(data):
for
each
in
data
.
result
():
# print(each)
# Here we choose not to display the "nan" values and the METAR ID
l
=
[
attributes
[
i
]
+
":"
+
str
(
each
[
i
+
4
])
for
i
in
range
(
len
(
attributes
))
if
str
(
each
[
i
+
4
])
!=
'n
an
'
l
=
[
attributes
[
i
]
+
":"
+
str
(
each
[
i
+
4
])
for
i
in
range
(
len
(
attributes
))
if
str
(
each
[
i
+
4
])
!=
'n
ull
'
and
attributes
[
i
]
!=
"metar"
]
string
=
'
\n
'
.
join
(
l
)
folium
.
Marker
([
each
[
2
],
each
[
1
]],
...
...
question3.py
View file @
f95b3734
...
...
@@ -72,17 +72,13 @@ def mapFonction1 (x):
def
mapFonction2
(
x
):
return
x
[
1
]
/
x
[
0
]
#Test if it is a type mean
def
testNan
(
x
):
test
=
x
!=
x
return
test
#Map reduce to caculate the means of each station
def
mapReduce_kmeans
(
data
,
targetNB
):
results
=
dict
()
for
row
in
data
.
result
():
data_target
=
row
[
targetNB
]
if
testNan
(
data_target
):
if
str
(
data_target
)
==
'null'
:
continue
data_espace
=
(
row
[
1
],
row
[
2
],
row
[
3
])
if
results
.
get
(
data_espace
)
is
None
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment