Commit a929da85 authored by Unknown's avatar Unknown

Add data treatment for question 2

parent 848d0f09
......@@ -143,7 +143,7 @@ def insertion_sql_Q2(filename,session):
data["metar"])
query = """
INSERT INTO database_kmeans(
INSERT INTO database_time(
date,
lon,
lat,
......
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
from database_pre2 import connection
import matplotlib.pyplot as plt
import re
import folium
def add (x,y):
return x+y
def abs_diff(x,y):
return abs(x-y)
def diff(x,y):
return x-y
#caculate mean reduce
#input [count,mean]
def reduceFonction (x,y):
result = []
for i in range(2):
result.append(reduce(add,[x[i],y[i]]))
return result
#input [valeur] -> [count,mean]
def mapFonction1 (x):
return [1,x]
#input [count,mean] -> [mean]
def mapFonction2 (x):
return x[1]/x[0]
def testNan (x):
test = x != x
return test
def mapReduce_kmeans(data,targetNB):
results = dict()
for row in data.result():
data_target = row[targetNB]
if testNan(data_target):
continue
data_espace = (row[1],row[2],row[3])
if results.get(data_espace) is None:
results[data_espace] = mapFonction1(data_target)
else:
mapresult = mapFonction1(data_target)
results[data_espace] = reduceFonction(mapresult,results[data_espace])
for eachEspace in results:
results[eachEspace] = mapFonction2(results[eachEspace])
return results
def cluster_nb_diff(centre_new,centre):
sum = 0
for i in range(3):
sum += abs(centre_new[i][0]-centre[i][0])
return sum/3
#input [tmpt] -> [tmpt,tmpt,tmpt,tmpt]
def map1_kmeans(x):
return [x,x,x,x]
def mapCentre(x):
return [x[0],x[1],x[2],0]
#input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt]
def reduceKmeans (x,y):
result = []
for i in range(4):
result.append(reduce(abs_diff,[x[i],y[i]]))
return result
#input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt]
def map2_kmeans(x):
min_value = 10000000000000
index = 0
for each in range(3):
if min_value > x[each]:
min_value = x[each]
index = each
return [index,min_value,x[3]]
def MapnewCentre(x):
return x[1]/x[0]
def kmeans (data,targetNB):
#3centre with [point count, temprature centre]
centre = {0:[1,0],1:[1,0],2:[1,0]}
#cluster est pour stocler lat, lon de chaque point de chaque cluster
cluster = [[],[],[]]
result = mapReduce_kmeans(data,targetNB)
#mettre le premier 3 point comme le centres init
init_point_values = [result[i] for i in result.keys()][:3]
init_point_keys = [i for i in result.keys()][:3]
for key in centre.keys():
centre[key] = [1,init_point_values[key]]
cluster[key].append(init_point_keys[key])
#init the centre new and result new for mapreduce
centre_new = {0:[0,0],1:[0,0],2:[0,0]}
result_new = dict()
#When the number of point of cluster don't change,stop
while True:
for eachkey in result:
if eachkey in cluster[0] or eachkey in cluster[1] or eachkey in cluster[2]:
continue
#caculate the distance between the data of this lingne and the centre
#Map1_kemeans
result_new[eachkey] = map1_kmeans(result[eachkey])
centre_values = []
for each in centre:
centre_values.append(centre[each][1])
centre_values = mapCentre(centre_values)
#Reduce
result_new[eachkey] = reduceKmeans(result_new[eachkey],centre_values)
#Map2_kmeans
result_new[eachkey] = map2_kmeans(result_new[eachkey])
#Put all the distance and points into the clusters
#Result format [cluster number, min(|tmpt - c|),tmpt - c]
for eachpoint in result_new:
clusterNB = result_new[eachpoint][0]
centre_new[clusterNB][0] += 1
centre_new[clusterNB][1] += result_new[eachpoint][2]
cluster[clusterNB].append(eachpoint)
#compare centre_new and centre, if
if not cluster_nb_diff(centre_new,centre) > 1:
break
else:
#caculate the new centre
print ('jasdlkjalsdkjalskd ',cluster_nb_diff(centre_new,centre))
for eachculster in centre_new:
centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster])
centre = centre_new
centre_new = {0:[0,0],1:[0,0],2:[0,0]}
result_new = dict()
cluster = [[],[],[]]
createMap(cluster)
def createMap(data):
mean_lat = 0
mean_lon = 0
count = 0
for each in data.result():
#print(each)
mean_lat += each[1]
mean_lon += each[2]
count += 1
if count == 0:
print('No data available at this timestamp !')
return
mean_lat = mean_lat/count
mean_lon = mean_lon/count
m = folium.Map(location=[mean_lon,mean_lat],zoom_start=6)
color = {0:'blue',1:'red',2:'green'}
attributes = ["alti", "drct", "dwpf", "feel", "gust", "ice_accretion_1hr", "ice_accretion_3hr", "ice_accretion_6hr",
"metar", "mslp", "p01i", "peak_wind_drct", "peak_wind_gust", "peak_wind_time", "relh", "sknt",
"skyc1", "skyc2", "skyc3", "skyc4", "skyl1", "skyl2", "skyl3", "skyl4", "tmpf", "vsby", "wxcodes"]
for each in data.result():
# print(each)
l = [attributes[i] + ":" + str(each[i + 4]) for i in range(len(attributes)) if str(each[i + 4]) != 'nan'
and attributes[i] != "metar"]
string='\n'.join(l)
folium.Marker([each[2],each[1]],
popup=string,
icon=folium.Icon(color='red')).add_to(m)
m.save("Projet-NF26/map.html")
if __name__ == "__main__":
session = connection()
timestamp = '2017-12-02 00:30:00'
data = session.execute_async("select * from meurouth_cql.database_time where date = '%s' ALLOW FILTERING"%(timestamp))
createMap(data)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment