Commit 51e0479e authored by sim-baz's avatar sim-baz

Kmeans valid, giving station and its centroid

parent 67ccb854
from cassandra.cluster import Cluster
from datetime import datetime
from sklearn.cluster import KMeans
import numpy as np
import loading as l
import history as h
......@@ -53,10 +56,41 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica
deciles[station].append({indicators_list[i] : []})
# Compute deciles, from 0 to 10 (= includes min and max)
for d in range(11):
deciles[station][i][indicators_list[i]].append(l[t[3]][i][indicators_list[i]][len(l[t[3]][i][indicators_list[i]]) // 10 * d])
if d == 10:
deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) - 1])
else:
deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) // 10 * d])
return deciles
def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod):
# Create table without map
table = []
# Create list with stations name
stations_name = []
for station in deciles.keys():
t = []
stations_name.append(station)
for i in range(nb_indicators):
t += deciles[station][i][indicators_list[i]]
print (t)
table.append(t)
if len(stations_name) < nb_clusters:
print(f"Le nombre de villes ayant des données est trop inférieur ({len(stations_name)}) pour appliquer les kmeans pour la période du {startPeriod} au {endPeriod}")
return None
kmeans = KMeans(n_clusters = 3, max_iter = 100).fit(table)
res = {}
i = 0
for station in stations_name:
res[station] = kmeans.labels_[i]
i += 1
return res
def kmeans(startPeriod, endPeriod, indicators_list):
startDate = datetime.strptime(startPeriod, "%Y-%m-%d")
......@@ -86,14 +120,17 @@ def kmeans(startPeriod, endPeriod, indicators_list):
indicators += "," + ind
indicators_list_numeric.append(ind)
nb_indicators += 1
# print(indicators, nb_indicators)
table = getDatasForPeriod(startPeriod, endPeriod, indicators)
table = list(table)
# Get the map with all deciles for all stations and indicators
table_decile = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric)
print (table_decile)
table_deciles = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric)
station_with_center = applyKmeans(table_deciles, nb_indicators, indicators_list_numeric, startPeriod, endPeriod)
if station_with_center != None:
print(f"Voici les villes et le cluster auxquelles elles appartiennent:")
print(f"{station_with_center}")
if __name__ == '__main__':
cluster = Cluster()
......@@ -101,5 +138,6 @@ if __name__ == '__main__':
session.set_keyspace("bazinsim_roisinos_metar")
print()
# kmeans("2001-01-01", "2010-12-31", ["tmpf", "skyc1"])
kmeans("2001-01-01", "2010-12-31", ["tmpf", "dwpf", "skyc1"])
print()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment