Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Simon Bazin
nf26-metar
Commits
51e0479e
Commit
51e0479e
authored
Jun 21, 2019
by
sim-baz
Browse files
Kmeans valid, giving station and its centroid
parent
67ccb854
Changes
1
Hide whitespace changes
Inline
Side-by-side
kmeans.py
View file @
51e0479e
from
cassandra.cluster
import
Cluster
from
datetime
import
datetime
from
sklearn.cluster
import
KMeans
import
numpy
as
np
import
loading
as
l
import
history
as
h
...
...
@@ -53,10 +56,41 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica
deciles
[
station
].
append
({
indicators_list
[
i
]
:
[]})
# Compute deciles, from 0 to 10 (= includes min and max)
for
d
in
range
(
11
):
deciles
[
station
][
i
][
indicators_list
[
i
]].
append
(
l
[
t
[
3
]][
i
][
indicators_list
[
i
]][
len
(
l
[
t
[
3
]][
i
][
indicators_list
[
i
]])
//
10
*
d
])
if
d
==
10
:
deciles
[
station
][
i
][
indicators_list
[
i
]].
append
(
l
[
station
][
i
][
indicators_list
[
i
]][
len
(
l
[
station
][
i
][
indicators_list
[
i
]])
-
1
])
else
:
deciles
[
station
][
i
][
indicators_list
[
i
]].
append
(
l
[
station
][
i
][
indicators_list
[
i
]][
len
(
l
[
station
][
i
][
indicators_list
[
i
]])
//
10
*
d
])
return
deciles
def
applyKmeans
(
deciles
,
nb_indicators
,
indicators_list
,
startPeriod
,
endPeriod
):
# Create table without map
table
=
[]
# Create list with stations name
stations_name
=
[]
for
station
in
deciles
.
keys
():
t
=
[]
stations_name
.
append
(
station
)
for
i
in
range
(
nb_indicators
):
t
+=
deciles
[
station
][
i
][
indicators_list
[
i
]]
print
(
t
)
table
.
append
(
t
)
if
len
(
stations_name
)
<
nb_clusters
:
print
(
f
"Le nombre de villes ayant des données est trop inférieur (
{
len
(
stations_name
)
}
) pour appliquer les kmeans pour la période du
{
startPeriod
}
au
{
endPeriod
}
"
)
return
None
kmeans
=
KMeans
(
n_clusters
=
3
,
max_iter
=
100
).
fit
(
table
)
res
=
{}
i
=
0
for
station
in
stations_name
:
res
[
station
]
=
kmeans
.
labels_
[
i
]
i
+=
1
return
res
def
kmeans
(
startPeriod
,
endPeriod
,
indicators_list
):
startDate
=
datetime
.
strptime
(
startPeriod
,
"%Y-%m-%d"
)
...
...
@@ -86,14 +120,17 @@ def kmeans(startPeriod, endPeriod, indicators_list):
indicators
+=
","
+
ind
indicators_list_numeric
.
append
(
ind
)
nb_indicators
+=
1
# print(indicators, nb_indicators)
table
=
getDatasForPeriod
(
startPeriod
,
endPeriod
,
indicators
)
table
=
list
(
table
)
# Get the map with all deciles for all stations and indicators
table_decile
=
getDecileForAllStations
(
startPeriod
,
endPeriod
,
table
,
nb_indicators
,
indicators_list_numeric
)
print
(
table_decile
)
table_deciles
=
getDecileForAllStations
(
startPeriod
,
endPeriod
,
table
,
nb_indicators
,
indicators_list_numeric
)
station_with_center
=
applyKmeans
(
table_deciles
,
nb_indicators
,
indicators_list_numeric
,
startPeriod
,
endPeriod
)
if
station_with_center
!=
None
:
print
(
f
"Voici les villes et le cluster auxquelles elles appartiennent:"
)
print
(
f
"
{
station_with_center
}
"
)
if
__name__
==
'__main__'
:
cluster
=
Cluster
()
...
...
@@ -101,5 +138,6 @@ if __name__ == '__main__':
session
.
set_keyspace
(
"bazinsim_roisinos_metar"
)
print
()
# kmeans("2001-01-01", "2010-12-31", ["tmpf", "skyc1"])
kmeans
(
"2001-01-01"
,
"2010-12-31"
,
[
"tmpf"
,
"dwpf"
,
"skyc1"
])
print
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment