question3.py 7.35 KB
 Tianyang committed Jun 15, 2019 1 2 3 4 5 6 7 ``````import numpy as np import matplotlib.pyplot as plt from functools import reduce from database_pre3 import connection import matplotlib.pyplot as plt import re import folium `````` Tianyang committed Jun 20, 2019 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 ``````import random table_variable = [ 'date', 'lon', 'lat', 'station', 'alti', 'drct', 'dwpf', 'feel', 'gust', 'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr', 'metar', 'mslp', 'p01i', 'peak_wind_drct', 'peak_wind_gust', 'peak_wind_time', 'relh', 'sknt', 'skyc1', 'skyc2', 'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'tmpf', 'vsby', 'wxcodes'] `````` Tianyang committed Jun 15, 2019 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 `````` def add (x,y): return x+y def abs_diff(x,y): return abs(x-y) def diff(x,y): return x-y #caculate mean reduce #input [count,mean] def reduceFonction (x,y): result = [] for i in range(2): result.append(reduce(add,[x[i],y[i]])) return result #input [valeur] -> [count,mean] def mapFonction1 (x): return [1,x] #input [count,mean] -> [mean] def mapFonction2 (x): return x[1]/x[0] `````` Tianyang committed Jun 21, 2019 76 ``````#Map reduce to caculate the means of each station `````` Tianyang committed Jun 22, 2019 77 ``````def mapReduce_kmeans(targetNB,start,end): `````` Tianyang committed Jun 15, 2019 78 `````` results = dict() `````` Tianyang committed Jun 22, 2019 79 `````` for row in session.execute("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end)): `````` Tianyang committed Jun 15, 2019 80 `````` data_target = row[targetNB] `````` Tianyang committed Jun 22, 2019 81 `````` if data_target == None: `````` Tianyang committed Jun 15, 2019 82 83 84 85 86 87 88 89 90 91 92 `````` continue data_espace = (row[1],row[2],row[3]) if results.get(data_espace) is None: results[data_espace] = mapFonction1(data_target) else: mapresult = mapFonction1(data_target) results[data_espace] = reduceFonction(mapresult,results[data_espace]) for eachEspace in results: results[eachEspace] = mapFonction2(results[eachEspace]) return results `````` Tianyang committed Jun 21, 2019 93 ``````#Caculate the difference of the number of the clusters `````` Tianyang committed Jun 15, 2019 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 ``````def cluster_nb_diff(centre_new,centre): sum = 0 for i in range(3): sum += abs(centre_new[i][0]-centre[i][0]) return sum/3 #input [tmpt] -> [tmpt,tmpt,tmpt,tmpt] def map1_kmeans(x): return [x,x,x,x] def mapCentre(x): return [x[0],x[1],x[2],0] `````` Tianyang committed Jun 21, 2019 110 ``````#input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] `````` Tianyang committed Jun 15, 2019 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 ``````def reduceKmeans (x,y): result = [] for i in range(4): result.append(reduce(abs_diff,[x[i],y[i]])) return result #input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt] def map2_kmeans(x): min_value = 10000000000000 index = 0 for each in range(3): if min_value > x[each]: min_value = x[each] index = each return [index,min_value,x[3]] `````` Tianyang committed Jun 21, 2019 130 ``````#Update the new center by means `````` Tianyang committed Jun 15, 2019 131 ``````def MapnewCentre(x): `````` Tianyang committed Jun 20, 2019 132 133 134 135 `````` if x[0] != 0: return x[1]/x[0] else: return 0 `````` Tianyang committed Jun 15, 2019 136 137 `````` `````` Tianyang committed Jun 21, 2019 138 ``````#The main algorithm of Kmeans `````` Tianyang committed Jun 22, 2019 139 ``````def kmeans (targetNB,target,start,end): `````` Tianyang committed Jun 15, 2019 140 141 142 `````` #cluster est pour stocler lat, lon de chaque point de chaque cluster cluster = [[],[],[]] `````` Tianyang committed Jun 22, 2019 143 `````` result = mapReduce_kmeans(targetNB,start,end) `````` Tianyang committed Jun 15, 2019 144 `````` `````` Tianyang committed Jun 20, 2019 145 146 147 148 149 150 151 152 153 154 155 156 157 158 `````` if len(result) < 3: raise Exception ("We\'ve just searched less than 3 station!!") #mettre ramdom 3 point comme le centres init center1 = random.choice(list(result)) center2 = random.choice(list(result)) center3 = random.choice(list(result)) init_point_values = [result[center1],result[center2],result[center3]] #init_point_values = [result[i] for i in result.keys()][:3] init_point_keys = [center1,center2,center3] #3centre with [point count, temprature centre] centre = {0:[0,0],1:[0,0],2:[0,0]} centre_new = {0:[0,0],1:[0,0],2:[0,0]} `````` Tianyang committed Jun 15, 2019 159 160 161 `````` for key in centre.keys(): centre[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 20, 2019 162 `````` centre_new[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 15, 2019 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 `````` cluster[key].append(init_point_keys[key]) #init the centre new and result new for mapreduce result_new = dict() #When the number of point of cluster don't change,stop while True: for eachkey in result: if eachkey in cluster[0] or eachkey in cluster[1] or eachkey in cluster[2]: continue #caculate the distance between the data of this lingne and the centre #Map1_kemeans result_new[eachkey] = map1_kmeans(result[eachkey]) centre_values = [] for each in centre: centre_values.append(centre[each][1]) centre_values = mapCentre(centre_values) #Reduce result_new[eachkey] = reduceKmeans(result_new[eachkey],centre_values) #Map2_kmeans result_new[eachkey] = map2_kmeans(result_new[eachkey]) #Put all the distance and points into the clusters #Result format [cluster number, min(|tmpt - c|),tmpt - c] for eachpoint in result_new: clusterNB = result_new[eachpoint][0] centre_new[clusterNB][0] += 1 centre_new[clusterNB][1] += result_new[eachpoint][2] cluster[clusterNB].append(eachpoint) #compare centre_new and centre, if if not cluster_nb_diff(centre_new,centre) > 1: break else: #caculate the new centre `````` Tianyang committed Jun 20, 2019 202 `````` print ("Cluster number differ: ",cluster_nb_diff(centre_new,centre)) `````` Tianyang committed Jun 15, 2019 203 204 `````` for eachculster in centre_new: centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster]) `````` Tianyang committed Jun 20, 2019 205 206 `````` print("center new: ",centre_new) print("center old: ",centre) `````` Tianyang committed Jun 15, 2019 207 208 209 210 `````` centre = centre_new centre_new = {0:[0,0],1:[0,0],2:[0,0]} result_new = dict() cluster = [[],[],[]] `````` Tianyang committed Jun 20, 2019 211 `````` createMap(cluster,result) `````` Tianyang committed Jun 15, 2019 212 `````` `````` Tianyang committed Jun 21, 2019 213 ``````#Create the map of the cluster `````` Tianyang committed Jun 20, 2019 214 ``````def createMap (cluster,result): `````` Tianyang committed Jun 15, 2019 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 `````` mean_lat = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lat += each_pos[0] count += 1 mean_lat = mean_lat/count mean_lon = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lon += each_pos[1] count += 1 mean_lon = mean_lon/count m = folium.Map(location=[mean_lon,mean_lat],zoom_start=6) color = {0:'blue',1:'red',2:'green'} i = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: `````` Tianyang committed Jun 20, 2019 237 `````` label = str(each_pos[2])+"\n"+""+target+":"+str(round(result[(each_pos[0],each_pos[1],each_pos[2])],2)) `````` Tianyang committed Jun 15, 2019 238 `````` folium.Marker([each_pos[1],each_pos[0]], `````` Tianyang committed Jun 20, 2019 239 `````` popup=label, `````` Tianyang committed Jun 15, 2019 240 241 242 `````` icon=folium.Icon(color=color[i])).add_to(m) i +=1 m.save("Projet-NF26/map.html") `````` Tianyang committed Jun 20, 2019 243 244 245 `````` print("Generate successfully") `````` Tianyang committed Jun 21, 2019 246 ``````#Check which number of the indicateur `````` Tianyang committed Jun 20, 2019 247 248 249 250 251 252 253 ``````def checkNBvariable (x): i=0 for each in table_variable: if x == each: return i i += 1 print ('Doesn\'t exist!!') `````` Tianyang committed Jun 15, 2019 254 255 256 257 `````` if __name__ == "__main__": session = connection() `````` Tianyang committed Jun 20, 2019 258 259 260 261 262 263 `````` start = input("Please enter the start time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") end = input("Please enter the end time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ") targetNB = checkNBvariable(target) #start = '2008-12-19' #end = '2012-12-14' `````` Tianyang committed Jun 22, 2019 264 265 `````` #data = session.execute_async("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end)) kmeans(targetNB,target,start,end)``````