Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Simon Bazin
nf26-metar
Commits
67ccb854
Commit
67ccb854
authored
Jun 21, 2019
by
sim-baz
Browse files
Starting kmeans function
Getting deciles for stations and indicators
parent
563b636a
Changes
3
Hide whitespace changes
Inline
Side-by-side
history.py
View file @
67ccb854
...
...
@@ -24,7 +24,7 @@ def getMeanByDay(table, dateMin, dateMax):
# Verify the measures is for the period chosen by user and have a value
if
year
>=
dateMin
and
year
<
dateMax
and
r
[
len
(
r
)
-
1
]
!=
None
:
# convert attributes to date format as string
date
=
str
(
r
[
0
])
+
"-"
+
str
(
r
[
1
])
+
"-"
+
str
(
r
[
2
])
date
=
str
(
r
[
0
])
+
"-"
+
"0"
*
(
2
-
len
(
str
(
r
[
1
])))
+
str
(
r
[
1
])
+
"-"
+
"0"
*
(
2
-
len
(
str
(
r
[
2
])))
+
str
(
r
[
2
])
if
date
not
in
table_date
.
keys
():
table_date
[
date
]
=
0
,
0
table_date
[
date
]
=
(
table_date
[
date
][
0
]
+
r
[
len
(
r
)
-
1
],
table_date
[
date
][
1
]
+
1
)
...
...
@@ -52,17 +52,23 @@ def getMeanByMonth(table):
return
table_month
# Verify the validity of the years given
def
verifyYearValidity
(
dateMin
,
dateMax
):
# Verification to ensure the validity of parameters, dates not equal
if
dateMin
==
dateMax
:
print
(
f
"Les dates ne doivent pas être égales"
)
return
False
# Verification to ensure the validity of parameters, dates in the right period
if
dateMin
<
MIN_DATE
or
dateMin
>
(
MAX_DATE
+
1
)
or
dateMax
<
MIN_DATE
or
dateMax
>
(
MAX_DATE
+
1
):
print
(
f
"Les dates doivent être comprises entre
{
MIN_DATE
}
et
{
MAX_DATE
}
"
)
return
False
return
True
def
plotHistory
(
station
,
indicator
,
dateMin
,
dateMax
):
# Accept only indicator with numeric values (not factors)
if
indicator
in
numeric_columns
:
# Verification to ensure the validity of parameters, dates not equal
if
dateMin
==
dateMax
:
print
(
f
"Les dates ne doivent pas être égales"
)
return
# Verification to ensure the validity of parameters, dates in the right period
if
dateMin
<
MIN_DATE
or
dateMin
>
(
MAX_DATE
+
1
)
or
dateMax
<
MIN_DATE
or
dateMax
>
(
MAX_DATE
+
1
):
print
(
f
"Les dates doivent être comprises entre
{
MIN_DATE
}
et
{
MAX_DATE
}
"
)
if
not
verifyYearValidity
(
dateMin
,
dateMax
):
return
# Get datas from cassandra table
...
...
@@ -71,12 +77,15 @@ def plotHistory(station, indicator, dateMin, dateMax):
# If no data for the period selected
if
not
table
:
print
(
f
"Aucune donnée pour la station
{
station
}
et pour l'indicateur
{
indicator
}
"
)
print
(
f
"Aucune donnée pour la station
{
station
}
et pour l'indicateur
{
indicator
}
et pour la période
{
dateMin
}
-
{
dateMax
}
"
)
return
# Treat datas
table_mean
=
getMeanByDay
(
table
,
dateMin
,
dateMax
)
table_mean_by_month
=
getMeanByMonth
(
table
)
if
not
table_mean
or
not
table_mean_by_month
:
print
(
f
"Aucune donnée pour la station
{
station
}
et pour l'indicateur
{
indicator
}
et pour la période
{
dateMin
}
-
{
dateMax
}
"
)
return
# Duplicate list for each year in the period required
liste
=
[]
...
...
@@ -105,7 +114,7 @@ def plotHistory(station, indicator, dateMin, dateMax):
graduation
=
[
"20"
+
"0"
*
(
2
-
len
(
str
(
i
)))
+
str
(
i
)
+
"-01-01"
for
i
in
range
(
int
(
str
(
dateMin
)[
2
:
4
]),
int
(
str
(
dateMax
)[
2
:
4
]),
step_graduation
)]
# Add the last value of graduation for the last day of measures
graduation
.
append
(
"20"
+
"0"
*
(
2
-
len
(
str
(
dateMax
-
1
)[
2
:
4
]))
+
str
(
dateMax
-
1
)[
2
:
4
]
+
"-12-31"
)
# Plot, with both measures and season mean
fig
,
ax1
=
plt
.
subplots
()
# Measures on axis 1
...
...
@@ -140,5 +149,5 @@ if __name__ == '__main__':
session
.
set_keyspace
(
"bazinsim_roisinos_metar"
)
print
()
plotHistory
(
"EFKI"
,
"tmpf"
,
2001
,
200
5
)
plotHistory
(
"EFKI"
,
"tmpf"
,
2001
,
200
4
)
print
()
\ No newline at end of file
kmeans.py
0 → 100644
View file @
67ccb854
from
cassandra.cluster
import
Cluster
from
datetime
import
datetime
import
loading
as
l
import
history
as
h
def
getDatasForPeriod
(
startPeriod
,
endPeriod
,
indicators
):
datas
=
[]
for
i
in
range
(
int
(
startPeriod
[
0
:
4
]),
int
(
endPeriod
[
0
:
4
])
+
1
):
datas
+=
session
.
execute
(
f
"SELECT year, month, day, station,
{
indicators
}
FROM
{
l
.
table_name_date
}
where year =
{
i
}
"
)
return
datas
def
verifyDateInPeriod
(
startPeriod
,
endPeriod
,
year
,
month
,
day
):
date
=
datetime
.
strptime
(
year
+
"-"
+
month
+
"-"
+
day
,
"%Y-%m-%d"
)
dateStart
=
datetime
.
strptime
(
startPeriod
,
"%Y-%m-%d"
)
dateEnd
=
datetime
.
strptime
(
endPeriod
,
"%Y-%m-%d"
)
if
date
<
dateStart
or
date
>
dateEnd
:
return
False
return
True
def
getDecileForAllStations
(
startPeriod
,
endPeriod
,
table
,
nb_indicators
,
indicators_list
):
# map with station and list of maps
# the list of maps is used for all indicators
# the second map contains the indicator with the list of values for this indicator
l
=
{}
for
t
in
table
:
if
verifyDateInPeriod
(
startPeriod
,
endPeriod
,
str
(
t
[
0
]),
str
(
t
[
1
]),
str
(
t
[
2
])):
if
t
[
3
]
not
in
l
.
keys
():
l
[
t
[
3
]]
=
[]
for
i
in
range
(
nb_indicators
):
if
t
[
4
+
i
]
!=
None
:
l
[
t
[
3
]].
append
({
indicators_list
[
i
]
:
[
float
(
t
[
4
+
i
])]})
else
:
for
i
in
range
(
nb_indicators
):
if
t
[
4
+
i
]
!=
None
:
l
[
t
[
3
]][
i
][
indicators_list
[
i
]].
append
(
float
(
t
[
4
+
i
]))
# Sort all lists of values
for
station
in
l
.
keys
():
for
i
in
range
(
nb_indicators
):
l
[
station
][
i
][
indicators_list
[
i
]].
sort
()
# Deciles is a map mapping station with a list of maps containing indicators and their deciles
# example for 2 stations with 2 indicators
# {'EFKI': [{'tmpf': [-23.8, 6.8, 17.6, 26.6, 32.0, 39.2, 44.6, 48.2, 53.6, 62.6, 91.4]}, {'dwpf': [-31.0, 5.0, 14.0, 24.8, 32.0, 35.6, 39.2, 42.8, 50.0, 55.4, 69.8]}], 'EFHA': [{'tmpf': [-23.8, 6.8, 17.6, 26.6, 32.0, 39.2, 44.6, 48.2, 53.6, 62.6, 91.4]}, {'dwpf': [-31.0, 5.0, 14.0, 24.8, 32.0, 35.6, 39.2, 42.8, 50.0, 55.4, 69.8]}]}
deciles
=
{}
for
station
in
l
.
keys
():
deciles
[
station
]
=
[]
for
i
in
range
(
nb_indicators
):
deciles
[
station
].
append
({
indicators_list
[
i
]
:
[]})
# Compute deciles, from 0 to 10 (= includes min and max)
for
d
in
range
(
11
):
deciles
[
station
][
i
][
indicators_list
[
i
]].
append
(
l
[
t
[
3
]][
i
][
indicators_list
[
i
]][
len
(
l
[
t
[
3
]][
i
][
indicators_list
[
i
]])
//
10
*
d
])
return
deciles
def
kmeans
(
startPeriod
,
endPeriod
,
indicators_list
):
startDate
=
datetime
.
strptime
(
startPeriod
,
"%Y-%m-%d"
)
endDate
=
datetime
.
strptime
(
endPeriod
,
"%Y-%m-%d"
)
firstDate
=
datetime
.
strptime
(
l
.
FIRST_DAY
,
"%Y-%m-%d"
)
lastDate
=
datetime
.
strptime
(
l
.
LAST_DAY
,
"%Y-%m-%d"
)
if
startDate
<
firstDate
or
startDate
>
lastDate
or
endDate
<
firstDate
or
endDate
>
lastDate
:
print
(
f
"Les dates doivent être comprises entre
{
l
.
FIRST_DAY
}
et
{
l
.
LAST_DAY
}
"
)
return
if
not
h
.
verifyYearValidity
(
int
(
startPeriod
[
0
:
4
]),
int
(
endPeriod
[
0
:
4
])):
return
# Create a string with indicators concatenated
indicators
=
""
indicators_list_numeric
=
[]
nb_indicators
=
0
for
ind
in
indicators_list
:
if
ind
in
l
.
numeric_columns
:
if
nb_indicators
==
0
:
indicators
+=
ind
indicators_list_numeric
.
append
(
ind
)
nb_indicators
+=
1
else
:
indicators
+=
","
+
ind
indicators_list_numeric
.
append
(
ind
)
nb_indicators
+=
1
# print(indicators, nb_indicators)
table
=
getDatasForPeriod
(
startPeriod
,
endPeriod
,
indicators
)
table
=
list
(
table
)
# Get the map with all deciles for all stations and indicators
table_decile
=
getDecileForAllStations
(
startPeriod
,
endPeriod
,
table
,
nb_indicators
,
indicators_list_numeric
)
print
(
table_decile
)
if
__name__
==
'__main__'
:
cluster
=
Cluster
()
session
=
cluster
.
connect
()
session
.
set_keyspace
(
"bazinsim_roisinos_metar"
)
print
()
kmeans
(
"2001-01-01"
,
"2010-12-31"
,
[
"tmpf"
,
"dwpf"
,
"skyc1"
])
print
()
loading.py
View file @
67ccb854
...
...
@@ -11,6 +11,9 @@ numeric_columns = ["lon","lat","tmpf","dwpf","relh","drct","sknt","p01i","alti",
MIN_DATE
=
2001
MAX_DATE
=
2010
FIRST_DAY
=
"2001-01-01"
LAST_DAY
=
"2010-12-31"
# Country: Finland
# Dates : 2001 to 2010
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment