question3.py 7.09 KB
 Tianyang committed Jun 15, 2019 1 2 3 4 5 6 7 ``````import numpy as np import matplotlib.pyplot as plt from functools import reduce from database_pre3 import connection import matplotlib.pyplot as plt import re import folium `````` Tianyang committed Jun 20, 2019 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 ``````import random table_variable = [ 'date', 'lon', 'lat', 'station', 'alti', 'drct', 'dwpf', 'feel', 'gust', 'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr', 'metar', 'mslp', 'p01i', 'peak_wind_drct', 'peak_wind_gust', 'peak_wind_time', 'relh', 'sknt', 'skyc1', 'skyc2', 'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'tmpf', 'vsby', 'wxcodes'] `````` Tianyang committed Jun 15, 2019 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 `````` def add (x,y): return x+y def abs_diff(x,y): return abs(x-y) def diff(x,y): return x-y #caculate mean reduce #input [count,mean] def reduceFonction (x,y): result = [] for i in range(2): result.append(reduce(add,[x[i],y[i]])) return result #input [valeur] -> [count,mean] def mapFonction1 (x): return [1,x] #input [count,mean] -> [mean] def mapFonction2 (x): return x[1]/x[0] def testNan (x): test = x != x return test def mapReduce_kmeans(data,targetNB): results = dict() for row in data.result(): data_target = row[targetNB] if testNan(data_target): continue data_espace = (row[1],row[2],row[3]) if results.get(data_espace) is None: results[data_espace] = mapFonction1(data_target) else: mapresult = mapFonction1(data_target) results[data_espace] = reduceFonction(mapresult,results[data_espace]) for eachEspace in results: results[eachEspace] = mapFonction2(results[eachEspace]) return results def cluster_nb_diff(centre_new,centre): sum = 0 for i in range(3): sum += abs(centre_new[i][0]-centre[i][0]) return sum/3 #input [tmpt] -> [tmpt,tmpt,tmpt,tmpt] def map1_kmeans(x): return [x,x,x,x] def mapCentre(x): return [x[0],x[1],x[2],0] #input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] def reduceKmeans (x,y): result = [] for i in range(4): result.append(reduce(abs_diff,[x[i],y[i]])) return result #input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt] def map2_kmeans(x): min_value = 10000000000000 index = 0 for each in range(3): if min_value > x[each]: min_value = x[each] index = each return [index,min_value,x[3]] def MapnewCentre(x): `````` Tianyang committed Jun 20, 2019 137 138 139 140 `````` if x[0] != 0: return x[1]/x[0] else: return 0 `````` Tianyang committed Jun 15, 2019 141 142 143 `````` `````` Tianyang committed Jun 20, 2019 144 ``````def kmeans (data,targetNB,target): `````` Tianyang committed Jun 15, 2019 145 146 147 148 149 `````` #cluster est pour stocler lat, lon de chaque point de chaque cluster cluster = [[],[],[]] result = mapReduce_kmeans(data,targetNB) `````` Tianyang committed Jun 20, 2019 150 151 152 153 154 155 156 157 158 159 160 161 162 163 `````` if len(result) < 3: raise Exception ("We\'ve just searched less than 3 station!!") #mettre ramdom 3 point comme le centres init center1 = random.choice(list(result)) center2 = random.choice(list(result)) center3 = random.choice(list(result)) init_point_values = [result[center1],result[center2],result[center3]] #init_point_values = [result[i] for i in result.keys()][:3] init_point_keys = [center1,center2,center3] #3centre with [point count, temprature centre] centre = {0:[0,0],1:[0,0],2:[0,0]} centre_new = {0:[0,0],1:[0,0],2:[0,0]} `````` Tianyang committed Jun 15, 2019 164 165 166 `````` for key in centre.keys(): centre[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 20, 2019 167 `````` centre_new[key] = [1,init_point_values[key]] `````` Tianyang committed Jun 15, 2019 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 `````` cluster[key].append(init_point_keys[key]) #init the centre new and result new for mapreduce result_new = dict() #When the number of point of cluster don't change,stop while True: for eachkey in result: if eachkey in cluster[0] or eachkey in cluster[1] or eachkey in cluster[2]: continue #caculate the distance between the data of this lingne and the centre #Map1_kemeans result_new[eachkey] = map1_kmeans(result[eachkey]) centre_values = [] for each in centre: centre_values.append(centre[each][1]) centre_values = mapCentre(centre_values) #Reduce result_new[eachkey] = reduceKmeans(result_new[eachkey],centre_values) #Map2_kmeans result_new[eachkey] = map2_kmeans(result_new[eachkey]) #Put all the distance and points into the clusters #Result format [cluster number, min(|tmpt - c|),tmpt - c] for eachpoint in result_new: clusterNB = result_new[eachpoint][0] centre_new[clusterNB][0] += 1 centre_new[clusterNB][1] += result_new[eachpoint][2] cluster[clusterNB].append(eachpoint) #compare centre_new and centre, if if not cluster_nb_diff(centre_new,centre) > 1: break else: #caculate the new centre `````` Tianyang committed Jun 20, 2019 207 208 `````` print ("Cluster number differ: ",cluster_nb_diff(centre_new,centre)) print (centre_new) `````` Tianyang committed Jun 15, 2019 209 210 `````` for eachculster in centre_new: centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster]) `````` Tianyang committed Jun 20, 2019 211 212 `````` print("center new: ",centre_new) print("center old: ",centre) `````` Tianyang committed Jun 15, 2019 213 214 215 216 `````` centre = centre_new centre_new = {0:[0,0],1:[0,0],2:[0,0]} result_new = dict() cluster = [[],[],[]] `````` Tianyang committed Jun 20, 2019 217 `````` createMap(cluster,result) `````` Tianyang committed Jun 15, 2019 218 219 `````` `````` Tianyang committed Jun 20, 2019 220 ``````def createMap (cluster,result): `````` Tianyang committed Jun 15, 2019 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 `````` mean_lat = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lat += each_pos[0] count += 1 mean_lat = mean_lat/count mean_lon = 0 count = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: mean_lon += each_pos[1] count += 1 mean_lon = mean_lon/count m = folium.Map(location=[mean_lon,mean_lat],zoom_start=6) color = {0:'blue',1:'red',2:'green'} i = 0 for each in [cluster[0],cluster[1],cluster[2]]: for each_pos in each: `````` Tianyang committed Jun 20, 2019 243 `````` label = str(each_pos[2])+"\n"+""+target+":"+str(round(result[(each_pos[0],each_pos[1],each_pos[2])],2)) `````` Tianyang committed Jun 15, 2019 244 `````` folium.Marker([each_pos[1],each_pos[0]], `````` Tianyang committed Jun 20, 2019 245 `````` popup=label, `````` Tianyang committed Jun 15, 2019 246 247 248 249 `````` icon=folium.Icon(color=color[i])).add_to(m) i +=1 print(i) m.save("Projet-NF26/map.html") `````` Tianyang committed Jun 20, 2019 250 251 252 `````` print("Generate successfully") `````` Tianyang committed Jun 15, 2019 253 `````` `````` Tianyang committed Jun 20, 2019 254 255 256 257 258 259 260 ``````def checkNBvariable (x): i=0 for each in table_variable: if x == each: return i i += 1 print ('Doesn\'t exist!!') `````` Tianyang committed Jun 15, 2019 261 262 263 264 `````` if __name__ == "__main__": session = connection() `````` Tianyang committed Jun 20, 2019 265 266 267 268 269 270 `````` start = input("Please enter the start time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") end = input("Please enter the end time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: ") target = input("Which indicator do you want to check [tmpf,dwpf,etc]: ") targetNB = checkNBvariable(target) #start = '2008-12-19' #end = '2012-12-14' `````` Tianyang committed Jun 15, 2019 271 `````` data = session.execute_async("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end)) `````` Tianyang committed Jun 20, 2019 272 `` kmeans(data,targetNB,target)``