Commit 51e0479e authored by sim-baz's avatar sim-baz
Browse files

Kmeans valid, giving station and its centroid

parent 67ccb854
from cassandra.cluster import Cluster from cassandra.cluster import Cluster
from datetime import datetime from datetime import datetime
from sklearn.cluster import KMeans
import numpy as np
import loading as l import loading as l
import history as h import history as h
...@@ -53,10 +56,41 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica ...@@ -53,10 +56,41 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica
deciles[station].append({indicators_list[i] : []}) deciles[station].append({indicators_list[i] : []})
# Compute deciles, from 0 to 10 (= includes min and max) # Compute deciles, from 0 to 10 (= includes min and max)
for d in range(11): for d in range(11):
deciles[station][i][indicators_list[i]].append(l[t[3]][i][indicators_list[i]][len(l[t[3]][i][indicators_list[i]]) // 10 * d]) if d == 10:
deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) - 1])
else:
deciles[station][i][indicators_list[i]].append(l[station][i][indicators_list[i]][len(l[station][i][indicators_list[i]]) // 10 * d])
return deciles return deciles
def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod):
# Create table without map
table = []
# Create list with stations name
stations_name = []
for station in deciles.keys():
t = []
stations_name.append(station)
for i in range(nb_indicators):
t += deciles[station][i][indicators_list[i]]
print (t)
table.append(t)
if len(stations_name) < nb_clusters:
print(f"Le nombre de villes ayant des données est trop inférieur ({len(stations_name)}) pour appliquer les kmeans pour la période du {startPeriod} au {endPeriod}")
return None
kmeans = KMeans(n_clusters = 3, max_iter = 100).fit(table)
res = {}
i = 0
for station in stations_name:
res[station] = kmeans.labels_[i]
i += 1
return res
def kmeans(startPeriod, endPeriod, indicators_list): def kmeans(startPeriod, endPeriod, indicators_list):
startDate = datetime.strptime(startPeriod, "%Y-%m-%d") startDate = datetime.strptime(startPeriod, "%Y-%m-%d")
...@@ -86,14 +120,17 @@ def kmeans(startPeriod, endPeriod, indicators_list): ...@@ -86,14 +120,17 @@ def kmeans(startPeriod, endPeriod, indicators_list):
indicators += "," + ind indicators += "," + ind
indicators_list_numeric.append(ind) indicators_list_numeric.append(ind)
nb_indicators += 1 nb_indicators += 1
# print(indicators, nb_indicators)
table = getDatasForPeriod(startPeriod, endPeriod, indicators) table = getDatasForPeriod(startPeriod, endPeriod, indicators)
table = list(table) table = list(table)
# Get the map with all deciles for all stations and indicators # Get the map with all deciles for all stations and indicators
table_decile = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric) table_deciles = getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list_numeric)
print (table_decile)
station_with_center = applyKmeans(table_deciles, nb_indicators, indicators_list_numeric, startPeriod, endPeriod)
if station_with_center != None:
print(f"Voici les villes et le cluster auxquelles elles appartiennent:")
print(f"{station_with_center}")
if __name__ == '__main__': if __name__ == '__main__':
cluster = Cluster() cluster = Cluster()
...@@ -101,5 +138,6 @@ if __name__ == '__main__': ...@@ -101,5 +138,6 @@ if __name__ == '__main__':
session.set_keyspace("bazinsim_roisinos_metar") session.set_keyspace("bazinsim_roisinos_metar")
print() print()
# kmeans("2001-01-01", "2010-12-31", ["tmpf", "skyc1"])
kmeans("2001-01-01", "2010-12-31", ["tmpf", "dwpf", "skyc1"]) kmeans("2001-01-01", "2010-12-31", ["tmpf", "dwpf", "skyc1"])
print() print()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment