Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
N
nf26_projet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Romain Creuzenet
nf26_projet
Commits
2c17dd6c
Commit
2c17dd6c
authored
Jun 18, 2019
by
Romain Creuzenet
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cluster all attributs
parent
b1dc8612
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
71 additions
and
43 deletions
+71
-43
.idea/workspace.xml
.idea/workspace.xml
+22
-11
main.py
main.py
+41
-31
parameters.py
parameters.py
+8
-1
No files found.
.idea/workspace.xml
View file @
2c17dd6c
...
...
@@ -2,8 +2,8 @@
<project
version=
"4"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"dec891dc-2fad-4291-af33-64d4fd64029d"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/main.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/main.py"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/parameters.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/parameters.py"
afterDir=
"false"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"SHOW_DIALOG"
value=
"false"
/>
...
...
@@ -16,7 +16,7 @@
<file
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/create_table.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"912"
>
<state>
<caret
line=
"51"
column=
"17"
selection-start-line=
"51"
selection-start-column=
"17"
selection-end-line=
"51"
selection-end-column=
"17"
/>
</state>
</provider>
...
...
@@ -25,10 +25,12 @@
<file
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
273
"
>
<caret
line=
"
328"
column=
"34"
selection-start-line=
"328"
selection-start-column=
"34"
selection-end-line=
"328"
selection-end-column=
"34
"
/>
<state
relative-caret-position=
"
659
"
>
<caret
line=
"
297"
column=
"51"
selection-start-line=
"297"
selection-start-column=
"51"
selection-end-line=
"297"
selection-end-column=
"51
"
/>
<folding>
<element
signature=
"e#45#96#0"
expanded=
"true"
/>
<element
signature=
"e#3287#4955#0"
/>
<element
signature=
"e#4984#7500#0"
/>
</folding>
</state>
</provider>
...
...
@@ -46,8 +48,8 @@
<file
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/parameters.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
1216
"
>
<caret
line=
"
67"
column=
"11"
selection-start-line=
"67"
selection-start-column=
"5"
selection-end-line=
"67"
selection-end-column=
"16
"
/>
<state
relative-caret-position=
"
228
"
>
<caret
line=
"
15"
column=
"1"
lean-forward=
"true"
selection-start-line=
"15"
selection-start-column=
"1"
selection-end-line=
"15"
selection-end-column=
"1
"
/>
</state>
</provider>
</entry>
...
...
@@ -213,7 +215,7 @@
</entry>
<entry
file=
"file://$PROJECT_DIR$/create_table.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"912"
>
<state>
<caret
line=
"51"
column=
"17"
selection-start-line=
"51"
selection-start-column=
"17"
selection-end-line=
"51"
selection-end-column=
"17"
/>
</state>
</provider>
...
...
@@ -225,19 +227,28 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/env_nf26/lib/python3.6/site-packages/cassandra/cluster.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"291"
>
<caret
line=
"140"
column=
"8"
selection-start-line=
"140"
selection-start-column=
"8"
selection-end-line=
"140"
selection-end-column=
"8"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/parameters.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
1216
"
>
<caret
line=
"
67"
column=
"11"
selection-start-line=
"67"
selection-start-column=
"5"
selection-end-line=
"67"
selection-end-column=
"16
"
/>
<state
relative-caret-position=
"
228
"
>
<caret
line=
"
15"
column=
"1"
lean-forward=
"true"
selection-start-line=
"15"
selection-start-column=
"1"
selection-end-line=
"15"
selection-end-column=
"1
"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/main.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
273
"
>
<caret
line=
"
328"
column=
"34"
selection-start-line=
"328"
selection-start-column=
"34"
selection-end-line=
"328"
selection-end-column=
"34
"
/>
<state
relative-caret-position=
"
659
"
>
<caret
line=
"
297"
column=
"51"
selection-start-line=
"297"
selection-start-column=
"51"
selection-end-line=
"297"
selection-end-column=
"51
"
/>
<folding>
<element
signature=
"e#45#96#0"
expanded=
"true"
/>
<element
signature=
"e#3287#4955#0"
/>
<element
signature=
"e#4984#7500#0"
/>
</folding>
</state>
</provider>
...
...
main.py
View file @
2c17dd6c
"""File to execute to show results"""
# Data
from
parameters
import
SESSION
,
DIR_OUT
,
START
,
END
from
parameters
import
SESSION
,
DIR_OUT
,
START
,
END
,
ATTRIBUTS
# Basic
import
matplotlib.dates
as
mdates
import
matplotlib.pyplot
as
plt
...
...
@@ -62,14 +62,10 @@ def ask_d(text=">>> "):
def
chose_attr
():
"""Permet de demander un attribut dans la table"""
# Search element
decision
=
{
"tmpf"
:
"La témparature (en Fahrenheit)"
,
"relh"
:
"L'humidité ( en %)"
}
print
(
"Choisissez un élément parmis les suivant :"
)
for
code
,
text
in
decision
.
items
():
for
code
,
text
in
ATTRIBUTS
.
items
():
print
(
"
\t
-"
,
text
,
":"
,
code
)
return
ask_q
(
decision
.
keys
())
return
ask_q
(
ATTRIBUTS
.
keys
())
def
ask_int
(
text
=
">>> "
):
...
...
@@ -88,6 +84,21 @@ def generate_color(i):
return
"#{:06x}"
.
format
(
random
.
randint
(
0
,
0xFFFFFF
))
def
initialisation_centroid
(
data
):
"""
generate 1 centroide
:param data: station : {attr1: 1, attr : 2...}
:return: {attr1: 1, attr : 2...}
"""
r
=
{}
for
attr
in
ATTRIBUTS
.
keys
():
all_attr
=
[
elt
[
attr
]
for
elt
in
data
.
values
()]
mini
=
int
(
min
(
all_attr
))
maxi
=
int
(
max
(
all_attr
))
r
[
attr
]
=
random
.
randint
(
mini
,
maxi
)
return
r
class
Manager
:
table
=
None
# table name use by the function
...
...
@@ -236,8 +247,7 @@ class Manager:
# Ask information from user
print
(
"=== Choix 3 : CLUSTER ==="
)
attr
=
chose_attr
()
print
(
"Vous allez devoir choisir une période de temps. On considéra la moyenne de l'attribut sur cette "
print
(
"Vous allez devoir choisir une période de temps. On considéra la moyenne des attributs sur cette "
"période de temps"
)
date_begin
=
date_end
=
None
while
date_begin
is
None
or
date_begin
>=
date_end
:
...
...
@@ -249,60 +259,61 @@ class Manager:
nb_cluster
=
ask_int
()
# Calc of mean
query
=
"SELECT station, lon, lat, {attr} FROM {table} WHERE time >= {begin} AND time <= {end}
ALLOW FILTERING
"
\
""
.
format
(
attr
=
attr
,
query
=
"SELECT station, lon, lat, {attr} FROM {table} WHERE time >= {begin} AND time <= {end} "
\
"
ALLOW FILTERING
"
.
format
(
attr
=
", "
.
join
(
ATTRIBUTS
.
keys
())
,
table
=
self
.
table
,
begin
=
date_begin
,
end
=
date_end
)
stations
=
{}
# station: {'nb': 3, '
sum': 5,
'lon': 3.27, 'lat': 12}
stations
=
{}
# station: {'nb': 3, '
attr1': 5, 'attr2': 7, ...,
'lon': 3.27, 'lat': 12}
for
row
in
execute_query
(
query
):
if
None
in
(
row
.
station
,
row
.
lon
,
row
.
lat
,
getattr
(
row
,
attr
))
:
if
None
in
[
row
.
station
,
row
.
lon
,
row
.
lat
]
+
[
getattr
(
row
,
attr
)
for
attr
in
ATTRIBUTS
.
keys
()]
:
continue
if
row
.
station
in
stations
:
stations
[
row
.
station
][
'sum'
]
+=
getattr
(
row
,
attr
)
for
attr
in
ATTRIBUTS
.
keys
():
stations
[
row
.
station
][
attr
]
+=
getattr
(
row
,
attr
)
stations
[
row
.
station
][
'nb'
]
+=
1
else
:
stations
[
row
.
station
]
=
{
'nb'
:
1
,
'sum'
:
getattr
(
row
,
attr
),
'lon'
:
row
.
lon
,
'lat'
:
row
.
lat
}
stations
[
row
.
station
]
=
{
'nb'
:
1
,
'lon'
:
row
.
lon
,
'lat'
:
row
.
lat
,
**
{
key
:
0
for
key
in
ATTRIBUTS
.
keys
()}}
for
value
in
stations
.
values
():
value
[
'mean'
]
=
value
[
'sum'
]
/
value
[
'nb'
]
means
=
[
elt
[
'mean'
]
for
elt
in
stations
.
values
()
]
for
attr
in
ATTRIBUTS
.
keys
():
value
[
attr
]
=
value
[
attr
]
/
value
[
'nb'
]
# Initialisation mean
mini
=
int
(
min
(
means
))
maxi
=
int
(
max
(
means
))
old_centroids
=
None
new_centroids
=
[
random
.
randint
(
mini
,
maxi
)
initialisation_centroid
(
stations
)
for
_
in
range
(
nb_cluster
)
]
while
old_centroids
!=
new_centroids
:
old_centroids
=
new_centroids
data
=
[
{
'sum'
:
0
,
'nb'
:
0
}
{
**
{
attr
:
0
for
attr
in
ATTRIBUTS
.
keys
()}
,
'nb'
:
0
}
for
_
in
range
(
nb_cluster
)
]
# could be parallelize
for
m
in
means
:
for
value_station
in
stations
.
values
()
:
distances
=
[
(
m
-
centroid
)
**
2
sum
([(
centroid
[
attr
]
-
value_station
[
attr
])
**
2
for
attr
in
ATTRIBUTS
.
keys
()])
for
centroid
in
old_centroids
]
i
=
distances
.
index
(
min
(
distances
))
data
[
i
][
'sum'
]
+=
m
for
attr
in
ATTRIBUTS
.
keys
():
data
[
i
][
attr
]
+=
value_station
[
attr
]
data
[
i
][
'nb'
]
+=
1
# end calc parallelize
if
0
in
[
value
[
'nb'
]
for
value
in
data
]:
# cluster empty do it again
new_centroids
=
[
random
.
randint
(
int
(
min
(
means
),
int
(
max
(
means
)))
)
initialisation_centroid
(
stations
)
for
_
in
range
(
nb_cluster
)
]
else
:
new_centroids
=
[
float
(
"{0:.2f}"
.
format
(
elt
[
'sum'
]
/
elt
[
'nb'
]))
{
attr
:
float
(
"{0:.2f}"
.
format
(
elt
[
attr
]
/
elt
[
'nb'
]))
for
attr
in
ATTRIBUTS
.
keys
()}
for
elt
in
data
]
...
...
@@ -332,7 +343,7 @@ class Manager:
for
station
,
value
in
stations
.
items
():
# Analyse the point
distances
=
[
(
value
[
'mean'
]
-
centroid
)
**
2
sum
([(
centroid
[
attr
]
-
value
[
attr
])
**
2
for
attr
in
ATTRIBUTS
.
keys
()])
for
centroid
in
old_centroids
]
i
=
distances
.
index
(
min
(
distances
))
...
...
@@ -340,11 +351,10 @@ class Manager:
# Add the point
x
,
y
=
the_map
(
value
[
'lon'
],
value
[
'lat'
])
the_map
.
plot
(
x
,
y
,
marker
=
"."
,
color
=
colors
[
i
])
plt
.
annotate
(
"{}
: {:.2f}"
.
format
(
station
,
value
[
'mean'
]
),
(
x
,
y
),
color
=
colors
[
i
])
plt
.
annotate
(
"{}
"
.
format
(
station
),
(
x
,
y
),
color
=
colors
[
i
])
title
=
"{nb_cluster} clusters d
e {attr} d
u {begin} au {end}"
.
format
(
title
=
"{nb_cluster} clusters du {begin} au {end}"
.
format
(
nb_cluster
=
nb_cluster
,
attr
=
attr
,
begin
=
datetime
(
*
list
(
date_begin
)).
strftime
(
'%Y-%m-%d %H:%M'
),
end
=
datetime
(
*
list
(
date_end
)).
strftime
(
'%Y-%m-%d %H:%M'
)
)
...
...
parameters.py
View file @
2c17dd6c
...
...
@@ -9,6 +9,11 @@ import datetime
# Can be specified
KEY_SPACE
=
(
"lhamadac_projet"
,
"nf26"
)
# For many computers
TABLE
=
"Spain"
ATTRIBUTS
=
{
"tmpf"
:
"La témparature (en Fahrenheit)"
,
"relh"
:
"L'humidité ( en %)"
,
"alti"
:
"Altimètre de pression en pouces"
}
# Don't change
SESSION
=
None
...
...
@@ -20,7 +25,7 @@ for key in KEY_SPACE:
else
:
break
if
SESSION
is
None
:
raise
NoHostAvailable
(
"Erreur de connection à cassandra"
)
raise
NoHostAvailable
(
"Erreur de connection à cassandra"
,
None
)
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# path of project
DIR_DATA
=
os
.
path
.
join
(
BASE_DIR
,
"data"
)
# folder with all data station
...
...
@@ -68,3 +73,5 @@ TABLES = {
'TABLE_SPACE'
:
"station, time"
,
'TABLE_TIME'
:
"time, lon, lat"
,
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment