question2.py 5.81 KB
Newer Older
Unknown's avatar
Unknown committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
from database_pre2 import connection
import matplotlib.pyplot as plt
import re
import folium

def add (x,y):
    return x+y


def abs_diff(x,y):
    return abs(x-y)

def diff(x,y):
    return x-y



#caculate mean reduce
#input [count,mean]
def reduceFonction (x,y):
    result = []
    for i in range(2):
        result.append(reduce(add,[x[i],y[i]]))
    return result

#input [valeur] -> [count,mean]
def mapFonction1 (x):
    return [1,x]

#input [count,mean] -> [mean]
def mapFonction2 (x):
    return x[1]/x[0]


def testNan (x):
    test = x != x
    return test


def mapReduce_kmeans(data,targetNB):
    results = dict()
    for row in data.result():
        data_target = row[targetNB]
        if testNan(data_target):
            continue
        data_espace = (row[1],row[2],row[3])
        if results.get(data_espace) is None:
            results[data_espace] = mapFonction1(data_target)
        else:
            mapresult = mapFonction1(data_target)
            results[data_espace] = reduceFonction(mapresult,results[data_espace])
    for eachEspace in results:
        results[eachEspace] = mapFonction2(results[eachEspace])
    return results


def cluster_nb_diff(centre_new,centre):
    sum = 0
    for i in range(3):
        sum += abs(centre_new[i][0]-centre[i][0])
    return sum/3



#input [tmpt] -> [tmpt,tmpt,tmpt,tmpt]
def map1_kmeans(x):
    return [x,x,x,x]

def mapCentre(x):
    return [x[0],x[1],x[2],0]

#input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt]


def reduceKmeans (x,y):
    result = []
    for i in range(4):
        result.append(reduce(abs_diff,[x[i],y[i]]))
    return result


#input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt]
def map2_kmeans(x):
    min_value = 10000000000000
    index = 0
    for each in range(3):
        if min_value > x[each]:
            min_value = x[each]
            index = each
    return [index,min_value,x[3]]




def MapnewCentre(x):
    return x[1]/x[0]



def kmeans (data,targetNB):
    #3centre with [point count, temprature centre]
    centre = {0:[1,0],1:[1,0],2:[1,0]}
    #cluster est pour stocler lat, lon de chaque point de chaque cluster
    cluster = [[],[],[]]

    result = mapReduce_kmeans(data,targetNB)

    #mettre le premier 3 point comme le centres init
    init_point_values = [result[i] for i in result.keys()][:3]
    init_point_keys = [i for i in result.keys()][:3]

    for key in centre.keys():
        centre[key] = [1,init_point_values[key]]
        cluster[key].append(init_point_keys[key])
    

    #init the centre new and result new for mapreduce
    centre_new = {0:[0,0],1:[0,0],2:[0,0]}
    result_new = dict()
    #When the number of point of cluster don't change,stop
    while True:
        for eachkey in result:
            if eachkey in cluster[0] or eachkey in cluster[1] or eachkey in cluster[2]:
                continue
            
            #caculate the distance between the data of this lingne and the centre

            #Map1_kemeans
            result_new[eachkey] = map1_kmeans(result[eachkey])
            centre_values = []
            for each in centre:
                centre_values.append(centre[each][1])
            centre_values = mapCentre(centre_values)

            #Reduce
            result_new[eachkey] = reduceKmeans(result_new[eachkey],centre_values)

            #Map2_kmeans
            result_new[eachkey] = map2_kmeans(result_new[eachkey])
        
        #Put all the distance and points into the clusters
        #Result format [cluster number, min(|tmpt - c|),tmpt - c]
        for eachpoint in result_new:
            clusterNB = result_new[eachpoint][0]
            centre_new[clusterNB][0] += 1
            centre_new[clusterNB][1] += result_new[eachpoint][2]
            cluster[clusterNB].append(eachpoint)
        
        #compare centre_new and centre, if
        if not cluster_nb_diff(centre_new,centre) > 1:
            break
        else:
            #caculate the new centre
            print ('jasdlkjalsdkjalskd         ',cluster_nb_diff(centre_new,centre))
            for eachculster in centre_new:
                centre_new[eachculster][1] = MapnewCentre(centre_new[eachculster])
            centre = centre_new
            centre_new = {0:[0,0],1:[0,0],2:[0,0]}
            result_new = dict()
            cluster = [[],[],[]]
    createMap(cluster)


def createMap(data):
    mean_lat = 0
    mean_lon = 0
    count = 0
    for each in data.result():
        #print(each)
        mean_lat += each[1]
        mean_lon += each[2]
        count += 1
    if count == 0:
        print('No data available at this timestamp !')
        return
    mean_lat = mean_lat/count
    mean_lon = mean_lon/count


    m = folium.Map(location=[mean_lon,mean_lat],zoom_start=6)

    color = {0:'blue',1:'red',2:'green'}
    attributes = ["alti", "drct", "dwpf", "feel", "gust", "ice_accretion_1hr", "ice_accretion_3hr", "ice_accretion_6hr",
                  "metar", "mslp", "p01i", "peak_wind_drct", "peak_wind_gust", "peak_wind_time", "relh", "sknt",
                  "skyc1", "skyc2", "skyc3", "skyc4", "skyl1", "skyl2", "skyl3", "skyl4", "tmpf", "vsby", "wxcodes"]
    for each in data.result():
        # print(each)
        l = [attributes[i] + ":" + str(each[i + 4]) for i in range(len(attributes)) if str(each[i + 4]) != 'nan'
             and attributes[i] != "metar"]
        string='\n'.join(l)
        folium.Marker([each[2],each[1]],
                    popup=string,
                    icon=folium.Icon(color='red')).add_to(m)
    m.save("Projet-NF26/map.html")



if __name__ == "__main__":
    session = connection()
    timestamp = '2017-12-02 00:30:00'
    data = session.execute_async("select * from meurouth_cql.database_time where date = '%s' ALLOW FILTERING"%(timestamp))
    createMap(data)