Commit 51e0479e by sim-baz

### Kmeans valid, giving station and its centroid

parent 67ccb854
 from cassandra.cluster import Cluster from datetime import datetime from sklearn.cluster import KMeans import numpy as np import loading as l import history as h ... ... @@ -53,10 +56,41 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica deciles[station].append({indicators_list[i] : []}) # Compute deciles, from 0 to 10 (= includes min and max) for d in range(11): deciles[station][i][indicators_list[i]].append(l[t[3]][i][indicators_list[i]][len(l[t[3]][i][indicators_list[i]]) // 10 * d]) if d == 10: deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) - 1]) else: deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) // 10 * d]) return deciles def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod): # Create table without map table = [] # Create list with stations name stations_name = [] for station in deciles.keys(): t = [] stations_name.append(station) for i in range(nb_indicators): t += deciles[station][i][indicators_list[i]] print (t) table.append(t) if len(stations_name) < nb_clusters: print(f"Le nombre de villes ayant des données est trop inférieur ({len(stations_name)}) pour appliquer les kmeans pour la période du {startPeriod} au {endPeriod}") return None kmeans = KMeans(n_clusters = 3, max_iter = 100).fit(table) res = {} i = 0 for station in stations_name: res[station] = kmeans.labels_[i] i += 1 return res def kmeans(startPeriod, endPeriod, indicators_list): startDate = datetime.strptime(startPeriod, "%Y-%m-%d") ... ... @@ -86,14 +120,17 @@ def kmeans(startPeriod, endPeriod, indicators_list): indicators += "," + ind indicators_list_numeric.append(ind) nb_indicators += 1 # print(indicators, nb_indicators) table = getDatasForPeriod(startPeriod, endPeriod, indicators) table = list(table) # Get the map with all deciles for all stations and indicators table_decile = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric) print (table_decile) table_deciles = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric) station_with_center = applyKmeans(table_deciles, nb_indicators, indicators_list_numeric, startPeriod, endPeriod) if station_with_center != None: print(f"Voici les villes et le cluster auxquelles elles appartiennent:") print(f"{station_with_center}") if __name__ == '__main__': cluster = Cluster() ... ... @@ -101,5 +138,6 @@ if __name__ == '__main__': session.set_keyspace("bazinsim_roisinos_metar") print() # kmeans("2001-01-01", "2010-12-31", ["tmpf", "skyc1"]) kmeans("2001-01-01", "2010-12-31", ["tmpf", "dwpf", "skyc1"]) print()
