Commit b40da489 authored by Tianyang's avatar Tianyang

Little change

parent 3759ed24
No preview for this file type
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -68,10 +68,12 @@ def mapFonction1 (x):
def mapFonction2 (x):
return [x[1]/x[0],x[2],x[3]]
#Test if it is a Nan
def testNan (x):
test = x != x
return test
#Map reduce fonction
def mapReduce_mmm(data,timeNB,targetNB):
results = dict()
for row in data.result():
......@@ -95,7 +97,7 @@ def mapReduce_mmm(data,timeNB,targetNB):
results[eachTime] = mapFonction2(results[eachTime])
return results
#Zip the values in 3 list, one by one
def zipValues (values):
result = [[],[],[]]
for i in range(3):
......@@ -103,7 +105,7 @@ def zipValues (values):
result[i].append(each[i])
return result
#Fonction 1: the history courbe graph
def drawCourbe_history(session,time,target,timeNB,targetNB,espace):
data = session.execute_async("select * from caitiany.database_espace where station = '%s'"%espace )
results = mapReduce_mmm(data,timeNB,targetNB)
......@@ -134,7 +136,8 @@ def drawCourbe_history(session,time,target,timeNB,targetNB,espace):
for label in ax.get_xticklabels()[::6]:
label.set_visible(True)
plt.legend(bbox_to_anchor=(1.0, 1), loc=1, borderaxespad=0.)
plt.savefig("Projet-NF26/test.png")
plt.savefig("Projet-NF26/question1.png")
print ("Generate successfully")
def checkNBvariable (x):
......@@ -150,11 +153,11 @@ def checkNBvariable (x):
def drawCourbe_season(session,season,target,targetNB,espace):
data = session.execute_async("select * from caitiany.database_espace where station = '%s'"%espace )
#We do the same map reduce as fonction 1 by fixing the time as season
results = mapReduce_mmm(data,2,targetNB)
results = seprateSeason(results,season)
keys = list(results.keys())
values = list(results.values())
print(values)
zipped_result = zipValues(values)
for each in keys:
index_each = keys.index(each)
......@@ -170,10 +173,11 @@ def drawCourbe_season(session,season,target,targetNB,espace):
plt.ylabel(target)
plt.grid(True)
plt.legend(bbox_to_anchor=(1.0, 1), loc=1, borderaxespad=0.)
plt.savefig("Projet-NF26/test.png")
plt.savefig("Projet-NF26/question1_season.png")
print ("Generate successfully")
#Choose the data of the season we want
def seprateSeason (results,season):
output = dict()
for each in results:
......@@ -185,13 +189,20 @@ def seprateSeason (results,season):
if __name__ == "__main__":
session = connection()
espace = input("Please enter what station you want to search: ")
#time = input("Per which kind of time: ")
target = input("Which value do you want to check: ")
#timeNB = checkNBvariable(time)
targetNB = checkNBvariable(target)
#drawCourbe_history(session,time,target,timeNB,targetNB,espace)
#Season check
season = input("Please enter which season you want to search: ")
drawCourbe_season(session,season,target,targetNB,espace)
choice = int(input("Which kind of service do you want?\n1.Station history\n2.Check history by seasons\nYour choice: "))
#if choice == 1, we will use the fonction 1
if choice == 1:
espace = input("Please enter which station you want to search [LEBZ,LETO,etc]: ")
time = input("By which kind of time [year,season,month,etc]: ")
target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ")
timeNB = checkNBvariable(time)
targetNB = checkNBvariable(target)
drawCourbe_history(session,time,target,timeNB,targetNB,espace)
else:
#Fonction 2, the check according to the seasons
espace = input("Please enter what station you want to search [LEBZ,LETO,etc]: ")
target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ")
targetNB = checkNBvariable(target)
season = input("Please enter which season you want to search [Spring,Summer,Autumn,Winter]: ")
drawCourbe_season(session,season,target,targetNB,espace)
......@@ -5,6 +5,44 @@ from database_pre3 import connection
import matplotlib.pyplot as plt
import re
import folium
import random
table_variable = [
'date',
'lon',
'lat',
'station',
'alti',
'drct',
'dwpf',
'feel',
'gust',
'ice_accretion_1hr',
'ice_accretion_3hr',
'ice_accretion_6hr',
'metar',
'mslp',
'p01i',
'peak_wind_drct',
'peak_wind_gust',
'peak_wind_time',
'relh',
'sknt',
'skyc1',
'skyc2',
'skyc3',
'skyc4',
'skyl1',
'skyl2',
'skyl3',
'skyl4',
'tmpf',
'vsby',
'wxcodes']
def add (x,y):
return x+y
......@@ -96,29 +134,41 @@ def map2_kmeans(x):
def MapnewCentre(x):
return x[1]/x[0]
if x[0] != 0:
return x[1]/x[0]
else:
return 0
def kmeans (data,targetNB):
#3centre with [point count, temprature centre]
centre = {0:[1,0],1:[1,0],2:[1,0]}
def kmeans (data,targetNB,target):
#cluster est pour stocler lat, lon de chaque point de chaque cluster
cluster = [[],[],[]]
result = mapReduce_kmeans(data,targetNB)
#mettre le premier 3 point comme le centres init
init_point_values = [result[i] for i in result.keys()][:3]
init_point_keys = [i for i in result.keys()][:3]
if len(result) < 3:
raise Exception ("We\'ve just searched less than 3 station!!")
#mettre ramdom 3 point comme le centres init
center1 = random.choice(list(result))
center2 = random.choice(list(result))
center3 = random.choice(list(result))
init_point_values = [result[center1],result[center2],result[center3]]
#init_point_values = [result[i] for i in result.keys()][:3]
init_point_keys = [center1,center2,center3]
#3centre with [point count, temprature centre]
centre = {0:[0,0],1:[0,0],2:[0,0]}
centre_new = {0:[0,0],1:[0,0],2:[0,0]}
for key in centre.keys():
centre[key] = [1,init_point_values[key]]
centre_new[key] = [1,init_point_values[key]]
cluster[key].append(init_point_keys[key])
#init the centre new and result new for mapreduce
centre_new = {0:[0,0],1:[0,0],2:[0,0]}
result_new = dict()
#When the number of point of cluster don't change,stop
while True:
......@@ -154,17 +204,20 @@ def kmeans (data,targetNB):
break
else:
#caculate the new centre
print ('jasdlkjalsdkjalskd ',cluster_nb_diff(centre_new,centre))
print ("Cluster number differ: ",cluster_nb_diff(centre_new,centre))
print (centre_new)
for eachculster in centre_new:
centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster])
print("center new: ",centre_new)
print("center old: ",centre)
centre = centre_new
centre_new = {0:[0,0],1:[0,0],2:[0,0]}
result_new = dict()
cluster = [[],[],[]]
createMap(cluster)
createMap(cluster,result)
def createMap (cluster):
def createMap (cluster,result):
mean_lat = 0
count = 0
for each in [cluster[0],cluster[1],cluster[2]]:
......@@ -187,18 +240,33 @@ def createMap (cluster):
i = 0
for each in [cluster[0],cluster[1],cluster[2]]:
for each_pos in each:
label = str(each_pos[2])+"\n"+""+target+":"+str(round(result[(each_pos[0],each_pos[1],each_pos[2])],2))
folium.Marker([each_pos[1],each_pos[0]],
popup=each_pos[2],
popup=label,
icon=folium.Icon(color=color[i])).add_to(m)
i +=1
print(i)
m.save("Projet-NF26/map.html")
print("Generate successfully")
def checkNBvariable (x):
i=0
for each in table_variable:
if x == each:
return i
i += 1
print ('Doesn\'t exist!!')
if __name__ == "__main__":
session = connection()
start = '2008-12-19'
end = '2012-12-14'
start = input("Please enter the start time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ")
end = input("Please enter the end time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ")
target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ")
targetNB = checkNBvariable(target)
#start = '2008-12-19'
#end = '2012-12-14'
data = session.execute_async("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end))
kmeans(data,4)
kmeans(data,targetNB,target)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment