question3.py 7.3 KB
 Tianyang committed Jun 15, 2019 1 2 3 4 5 6 7 ``````import numpy as np import matplotlib.pyplot as plt from functools import reduce from database_pre3 import connection import matplotlib.pyplot as plt import re import folium `````` Tianyang committed Jun 20, 2019 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 ``````import random table_variable = [ 'date', 'lon', 'lat', 'station', 'alti', 'drct', 'dwpf', 'feel', 'gust', 'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr', 'metar', 'mslp', 'p01i', 'peak_wind_drct', 'peak_wind_gust', 'peak_wind_time', 'relh', 'sknt', 'skyc1', 'skyc2', 'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'tmpf', 'vsby', 'wxcodes'] `````` Tianyang committed Jun 15, 2019 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 `````` def add (x,y): return x+y def abs_diff(x,y): return abs(x-y) def diff(x,y): return x-y #caculate mean reduce #input [count,mean] def reduceFonction (x,y): result = [] for i in range(2): result.append(reduce(add,[x[i],y[i]])) return result #input [valeur] -> [count,mean] def mapFonction1 (x): return [1,x] #input [count,mean] -> [mean] def mapFonction2 (x): return x[1]/x[0] `````` Tianyang committed Jun 21, 2019 75 ``````#Test if it is a type mean `````` Tianyang committed Jun 15, 2019 76 77 78 79 ``````def testNan (x): test = x != x return test `````` Tianyang committed Jun 21, 2019 80 ``````#Map reduce to caculate the means of each station `````` Tianyang committed Jun 15, 2019 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 ``````def mapReduce_kmeans(data,targetNB): results = dict() for row in data.result(): data_target = row[targetNB] if testNan(data_target): continue data_espace = (row[1],row[2],row[3]) if results.get(data_espace) is None: results[data_espace] = mapFonction1(data_target) else: mapresult = mapFonction1(data_target) results[data_espace] = reduceFonction(mapresult,results[data_espace]) for eachEspace in results: results[eachEspace] = mapFonction2(results[eachEspace]) return results `````` Tianyang committed Jun 21, 2019 97 ``````#Caculate the difference of the number of the clusters `````` Tianyang committed Jun 15, 2019 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 ``````def cluster_nb_diff(centre_new,centre): sum = 0 for i in range(3): sum += abs(centre_new[i][0]-centre[i][0]) return sum/3 #input [tmpt] -> [tmpt,tmpt,tmpt,tmpt] def map1_kmeans(x): return [x,x,x,x] def mapCentre(x): return [x[0],x[1],x[2],0] `````` Tianyang committed Jun 21, 2019 114 ``````#input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] `````` Tianyang committed Jun 15, 2019 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 ``````def reduceKmeans (x,y): result = [] for i in range(4): result.append(reduce(abs_diff,[x[i],y[i]])) return result #input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt] def map2_kmeans(x): min_value = 10000000000000 index = 0 for each in range(3): if min_value > x[each]: min_value = x[each] index = each return [index,min_value,x[3]] `````` Tianyang committed Jun 21, 2019 134 ``````#Update the new center by means `````` Tianyang committed Jun 15, 2019 135 ``````def MapnewCentre(x): `````` Tianyang committed Jun 20, 2019 136 137 138 139 `````` if x[0] != 0: return x[1]/x[0] else: return 0 `````` Tianyang committed Jun 15, 2019 140 141 `````` `````` Tianyang committed Jun 21, 2019 142 ``````#The main algorithm of Kmeans `````` Tianyang committed Jun 20, 2019 143 ``````def kmeans (data,targetNB,target): `````` Tianyang committed Jun 15, 2019 144 145 146 147 148 `````` #cluster est pour stocler lat, lon de chaque point de chaque cluster cluster = [[],[],[]] result = mapReduce_kmeans(data,targetNB) `````` Tianyang committed Jun 20, 2019 149 150 151 152 153 154 155 156 157 158 159 160 161 162 `````` if len(result) < 3: raise Exception ("We\'ve just searched less than 3 station!!") #mettre ramdom 3 point comme le centres init center1 = random.choice(list(result)) center2 = random.choice(list(result)) center3 = random.choice(list(result)) init_point_values = [result[center1],result[center2],result[center3]] #init_point_values = [result[i] for i in result.keys()][:3] init_point_keys = [center1,center2,center3] #3centre with [point count, temprature centre] centre = {0:[0,0],1:[0,0],2:[0,0]} centre_new = {0:[0,0],1:[0,0],2:[0,0]} `````` Tianyang committed Jun 15, 2019 163 164 165 `````` for key in centre.keys(): centre[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 20, 2019 166 `````` centre_new[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 15, 2019 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 `````` cluster[key].append(init_point_keys[key]) #init the centre new and result new for mapreduce result_new = dict() #When the number of point of cluster don't change,stop while True: for eachkey in result: if eachkey in cluster[0] or eachkey in cluster[1] or eachkey in cluster[2]: continue #caculate the distance between the data of this lingne and the centre #Map1_kemeans result_new[eachkey] = map1_kmeans(result[eachkey]) centre_values = [] for each in centre: centre_values.append(centre[each][1]) centre_values = mapCentre(centre_values) #Reduce result_new[eachkey] = reduceKmeans(result_new[eachkey],centre_values) #Map2_kmeans result_new[eachkey] = map2_kmeans(result_new[eachkey]) #Put all the distance and points into the clusters #Result format [cluster number, min(|tmpt - c|),tmpt - c] for eachpoint in result_new: clusterNB = result_new[eachpoint][0] centre_new[clusterNB][0] += 1 centre_new[clusterNB][1] += result_new[eachpoint][2] cluster[clusterNB].append(eachpoint) #compare centre_new and centre, if if not cluster_nb_diff(centre_new,centre) > 1: break else: #caculate the new centre `````` Tianyang committed Jun 20, 2019 206 `````` print ("Cluster number differ: ",cluster_nb_diff(centre_new,centre)) `````` Tianyang committed Jun 15, 2019 207 208 `````` for eachculster in centre_new: centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster]) `````` Tianyang committed Jun 20, 2019 209 210 `````` print("center new: ",centre_new) print("center old: ",centre) `````` Tianyang committed Jun 15, 2019 211 212 213 214 `````` centre = centre_new centre_new = {0:[0,0],1:[0,0],2:[0,0]} result_new = dict() cluster = [[],[],[]] `````` Tianyang committed Jun 20, 2019 215 `````` createMap(cluster,result) `````` Tianyang committed Jun 15, 2019 216 `````` `````` Tianyang committed Jun 21, 2019 217 ``````#Create the map of the cluster `````` Tianyang committed Jun 20, 2019 218 ``````def createMap (cluster,result): `````` Tianyang committed Jun 15, 2019 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 `````` mean_lat = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lat += each_pos[0] count += 1 mean_lat = mean_lat/count mean_lon = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lon += each_pos[1] count += 1 mean_lon = mean_lon/count m = folium.Map(location=[mean_lon,mean_lat],zoom_start=6) color = {0:'blue',1:'red',2:'green'} i = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: `````` Tianyang committed Jun 20, 2019 241 `````` label = str(each_pos[2])+"\n"+""+target+":"+str(round(result[(each_pos[0],each_pos[1],each_pos[2])],2)) `````` Tianyang committed Jun 15, 2019 242 `````` folium.Marker([each_pos[1],each_pos[0]], `````` Tianyang committed Jun 20, 2019 243 `````` popup=label, `````` Tianyang committed Jun 15, 2019 244 245 246 `````` icon=folium.Icon(color=color[i])).add_to(m) i +=1 m.save("Projet-NF26/map.html") `````` Tianyang committed Jun 20, 2019 247 248 249 `````` print("Generate successfully") `````` Tianyang committed Jun 21, 2019 250 ``````#Check which number of the indicateur `````` Tianyang committed Jun 20, 2019 251 252 253 254 255 256 257 ``````def checkNBvariable (x): i=0 for each in table_variable: if x == each: return i i += 1 print ('Doesn\'t exist!!') `````` Tianyang committed Jun 15, 2019 258 259 260 261 `````` if __name__ == "__main__": session = connection() `````` Tianyang committed Jun 20, 2019 262 263 264 265 266 267 `````` start = input("Please enter the start time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") end = input("Please enter the end time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ") targetNB = checkNBvariable(target) #start = '2008-12-19' #end = '2012-12-14' `````` Tianyang committed Jun 15, 2019 268 `````` data = session.execute_async("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end)) `````` Tianyang committed Jun 20, 2019 269 `` kmeans(data,targetNB,target)``