Commit f95b3734 authored by Tianyang's avatar Tianyang

Null insert

parent ca81e57a
No preview for this file type
......@@ -124,6 +124,38 @@ def load_data(filename):
#Create the query according to if each collonne's value is null or not
def createQuery(data):
result = dict()
for each in data:
if data[each] != "nan" and str(data[each]) != 'nan':
result[each] = data[each]
ligne_value = []
for each in result:
ligne_value.append(result[each])
ligne_value = tuple(ligne_value)
ligne = []
for each in result:
ligne.append(each)
ligne = tuple(ligne)
#connect the query together
query = "INSERT INTO database_espace("
for eachc in ligne:
query += str(eachc)+","
query = "".join(list(query)[:-1]) + ") VALUES("
longth = len(ligne)
for _ in range(longth):
query += "%s,"
query = "".join(list(query)[:-1]) + ");"
return query, ligne_value
def insection_sql_Q1(filename,session):
target = load_data(filename)
i = 1
......@@ -131,90 +163,12 @@ def insection_sql_Q1(filename,session):
i += 1
if (i % 500 == 0):
print("500 finished.....")
ligne = (data["station"],
data["year"],
data["season"],
data["month"],
data["day"],
data["hour"],
data["minute"],
data["lon"],
data["lat"],
data["tmpf"],
data["dwpf"],
data["relh"],
data["drct"],
data["sknt"],
data["p01i"],
data["alti"],
data["mslp"],
data["vsby"],
data["gust"],
data["skyc1"],
data["skyc2"],
data["skyc3"],
data["skyc4"],
data["skyl1"],
data["skyl2"],
data["skyl3"],
data["skyl4"],
data["wxcodes"],
data["ice_accretion_1hr"],
data["ice_accretion_3hr"],
data["ice_accretion_6hr"],
data["peak_wind_gust"],
data["peak_wind_drct"],
data["peak_wind_time"],
data["feel"],
data["metar"])
query = """
INSERT INTO database_espace(
station,
year,
season,
month,
day,
hour,
minute,
lon,
lat,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query, ligne = createQuery(data)
session.execute(query, ligne)
if __name__ == "__main__":
session = connection()
databaseCreate_Q1(session)
......
......@@ -99,6 +99,34 @@ def load_data(filename):
yield data
#Create the query according to if each collonne's value is null or not
def createQuery(data):
result = dict()
for each in data:
if data[each] != "nan" and str(data[each]) != 'nan':
result[each] = data[each]
ligne_value = []
for each in result:
ligne_value.append(result[each])
ligne_value = tuple(ligne_value)
ligne = []
for each in result:
ligne.append(each)
ligne = tuple(ligne)
#connect the query together
query = "INSERT INTO database_time("
for eachc in ligne:
query += str(eachc)+","
query = "".join(list(query)[:-1]) + ") VALUES("
longth = len(ligne)
for i in range(longth):
query += "%s,"
query = "".join(list(query)[:-1]) + ");"
return query, ligne_value
def insertion_sql_Q2(filename,session):
target = load_data(filename)
......@@ -109,81 +137,10 @@ def insertion_sql_Q2(filename,session):
if (i % 500 == 0):
k += 1
print(k,". 500 finished.....")
ligne = (
data["date"],
data["lon"],
data["lat"],
data["station"],
data["tmpf"],
data["dwpf"],
data["relh"],
data["drct"],
data["sknt"],
data["p01i"],
data["alti"],
data["mslp"],
data["vsby"],
data["gust"],
data["skyc1"],
data["skyc2"],
data["skyc3"],
data["skyc4"],
data["skyl1"],
data["skyl2"],
data["skyl3"],
data["skyl4"],
data["wxcodes"],
data["ice_accretion_1hr"],
data["ice_accretion_3hr"],
data["ice_accretion_6hr"],
data["peak_wind_gust"],
data["peak_wind_drct"],
data["peak_wind_time"],
data["feel"],
data["metar"])
query = """
INSERT INTO database_kmeans(
date,
lon,
lat,
station,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query, ligne = createQuery(data)
session.execute(query, ligne)
if __name__ == "__main__":
session = connection()
databaseCreate_Q2(session)
......
......@@ -100,6 +100,41 @@ def load_data(filename):
#Create the query according to if each collonne's value is null or not
def createQuery(data):
result = dict()
for each in data:
if data[each] != "nan" and str(data[each]) != 'nan':
result[each] = data[each]
ligne_value = []
for each in result:
ligne_value.append(result[each])
ligne_value = tuple(ligne_value)
ligne = []
for each in result:
ligne.append(each)
ligne = tuple(ligne)
#connect the query together
query = "INSERT INTO database_kmeans("
for eachc in ligne:
query += str(eachc)+","
query = "".join(list(query)[:-1]) + ") VALUES("
longth = len(ligne)
for _ in range(longth):
query += "%s,"
query = "".join(list(query)[:-1]) + ");"
return query, ligne_value
def insection_sql_Q3(filename,session):
target = load_data(filename)
i = 1
......@@ -109,75 +144,7 @@ def insection_sql_Q3(filename,session):
if (i % 500 == 0):
k += 1
print(k,". 500 finished.....")
ligne = (
data["date"],
data["lon"],
data["lat"],
data["station"],
data["tmpf"],
data["dwpf"],
data["relh"],
data["drct"],
data["sknt"],
data["p01i"],
data["alti"],
data["mslp"],
data["vsby"],
data["gust"],
data["skyc1"],
data["skyc2"],
data["skyc3"],
data["skyc4"],
data["skyl1"],
data["skyl2"],
data["skyl3"],
data["skyl4"],
data["wxcodes"],
data["ice_accretion_1hr"],
data["ice_accretion_3hr"],
data["ice_accretion_6hr"],
data["peak_wind_gust"],
data["peak_wind_drct"],
data["peak_wind_time"],
data["feel"],
data["metar"])
query = """
INSERT INTO database_kmeans(
date,
lon,
lat,
station,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
query, ligne = createQuery(data)
session.execute(query, ligne)
......
......@@ -68,10 +68,6 @@ def mapFonction1 (x):
def mapFonction2 (x):
return [x[1]/x[0],x[2],x[3]]
#Test if it is a Nan
def testNan (x):
test = x != x
return test
#Map reduce fonction
def mapReduce_mmm(data,timeNB,targetNB):
......@@ -86,7 +82,7 @@ def mapReduce_mmm(data,timeNB,targetNB):
else:
assert 1==2, "Doesn\'t exits!"
data_target = row[targetNB]
if testNan(data_time) or testNan(data_target):
if str(data_time) == 'null' or str(data_target) == 'null':
continue
if results.get(data_time) is None:
results[data_time] = mapFonction1(data_target)
......
......@@ -30,7 +30,7 @@ def createMap(data):
for each in data.result():
# print(each)
# Here we choose not to display the "nan" values and the METAR ID
l = [attributes[i] + ":" + str(each[i + 4]) for i in range(len(attributes)) if str(each[i + 4]) != 'nan'
l = [attributes[i] + ":" + str(each[i + 4]) for i in range(len(attributes)) if str(each[i + 4]) != 'null'
and attributes[i] != "metar"]
string='\n'.join(l)
folium.Marker([each[2],each[1]],
......
......@@ -72,17 +72,13 @@ def mapFonction1 (x):
def mapFonction2 (x):
return x[1]/x[0]
#Test if it is a type mean
def testNan (x):
test = x != x
return test
#Map reduce to caculate the means of each station
def mapReduce_kmeans(data,targetNB):
results = dict()
for row in data.result():
data_target = row[targetNB]
if testNan(data_target):
if str(data_target) == 'null':
continue
data_espace = (row[1],row[2],row[3])
if results.get(data_espace) is None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment