Commit dfbf01eb authored by Romain Creuzenet's avatar Romain Creuzenet

cluster

parent 5ed4567c
......@@ -2,9 +2,6 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="dec891dc-2fad-4291-af33-64d4fd64029d" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.~lock.Rapport.odt#" beforeDir="false" afterPath="$PROJECT_DIR$/.~lock.Rapport.odt#" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Rapport.odt" beforeDir="false" afterPath="$PROJECT_DIR$/Rapport.odt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
......@@ -18,7 +15,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/create_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="811">
<state relative-caret-position="912">
<caret line="51" column="17" selection-start-line="51" selection-start-column="17" selection-end-line="51" selection-end-column="17" />
</state>
</provider>
......@@ -27,10 +24,10 @@
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361">
<caret line="136" column="18" lean-forward="true" selection-start-line="136" selection-start-column="18" selection-end-line="136" selection-end-column="18" />
<state relative-caret-position="273">
<caret line="313" selection-start-line="313" selection-end-line="313" />
<folding>
<element signature="e#45#75#0" expanded="true" />
<element signature="e#45#96#0" expanded="true" />
</folding>
</state>
</provider>
......@@ -39,7 +36,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/download_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-804">
<state relative-caret-position="19">
<caret line="5" column="52" selection-start-line="5" selection-start-column="52" selection-end-line="5" selection-end-column="52" />
</state>
</provider>
......@@ -48,11 +45,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/parameters.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="513">
<caret line="27" selection-start-line="27" selection-end-line="27" />
<folding>
<element signature="e#62#99#0" expanded="true" />
</folding>
<state relative-caret-position="817">
<caret line="67" column="11" lean-forward="true" selection-start-line="67" selection-start-column="5" selection-end-line="67" selection-end-column="16" />
</state>
</provider>
</entry>
......@@ -71,6 +65,7 @@
<find>session</find>
<find>service</find>
<find>figure</find>
<find>print</find>
</findStrings>
<replaceStrings>
<replace>SESSION</replace>
......@@ -103,8 +98,8 @@
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane" />
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
......@@ -197,48 +192,51 @@
<entry file="file://$PROJECT_DIR$/data/LEGA_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LEGE_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LEBZ_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LFLD_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/data/LFGA_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/data/LFLA_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/data/LFOI_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor" />
<entry file="file://$PROJECT_DIR$/data/LFLD_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LFGA_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LFLA_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/data/LFOI_2001_2010.csv" />
<entry file="file://$PROJECT_DIR$/create_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="912">
<caret line="51" column="17" selection-start-line="51" selection-start-column="17" selection-end-line="51" selection-end-column="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/download_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-804">
<state relative-caret-position="19">
<caret line="5" column="52" selection-start-line="5" selection-start-column="52" selection-end-line="5" selection-end-column="52" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/create_table.py">
<entry file="file://$PROJECT_DIR$/env_nf26/lib/python3.6/site-packages/mpl_toolkits/basemap/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="811">
<caret line="51" column="17" selection-start-line="51" selection-start-column="17" selection-end-line="51" selection-end-column="17" />
<state relative-caret-position="-12363">
<caret line="585" column="8" selection-start-line="585" selection-start-column="8" selection-end-line="585" selection-end-column="8" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/env_nf26/lib/python3.6/site-packages/matplotlib/pyplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="291">
<caret line="2785" column="4" selection-start-line="2785" selection-start-column="4" selection-end-line="2785" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/parameters.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="513">
<caret line="27" selection-start-line="27" selection-end-line="27" />
<folding>
<element signature="e#62#99#0" expanded="true" />
</folding>
<state relative-caret-position="817">
<caret line="67" column="11" lean-forward="true" selection-start-line="67" selection-start-column="5" selection-end-line="67" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361">
<caret line="136" column="18" lean-forward="true" selection-start-line="136" selection-start-column="18" selection-end-line="136" selection-end-column="18" />
<state relative-caret-position="273">
<caret line="313" selection-start-line="313" selection-end-line="313" />
<folding>
<element signature="e#45#75#0" expanded="true" />
<element signature="e#45#96#0" expanded="true" />
</folding>
</state>
</provider>
......
......@@ -7,6 +7,7 @@ import matplotlib.pyplot as plt
import warnings
import re
import os
import random
# Stats
import statsmodels.graphics as stm_graphs
import pandas as pd
......@@ -47,8 +48,15 @@ def ask_d(text=">>> "):
m = match.groupdict()
result = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute']))
return result
try:
date = datetime(*list(result))
if not START < date < END:
return ask_d(text)
except ValueError:
return ask_d(text)
else:
return result
def chose_attr():
......@@ -64,16 +72,50 @@ def chose_attr():
return ask_q(decision.keys())
def ask_cluster(text=">>> "):
def ask_int(text=">>> "):
"""Permet de demander un entier"""
answer = ""
while not answer or not answer.isdigit():
answer = input(text)
return int(answer)
def generate_color():
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
def clear_zone(zone):
"""
Permet de néttoyer une zone
:param zone: list de tuple x, y
:return: lats, lons
"""
xs, ys = [], []
for x, y in zone:
xs.append(x)
ys.append(y)
x_min = min(xs)
y_min = min(ys)
x_max = max(xs)
y_max = max(ys)
lons = [x_min, x_max, x_max, x_min, x_min]
lats = [y_max, y_max, y_min, y_min, y_max]
return lats, lons
class Manager:
table = None # table name use by the function
# for map
# data has a precision of 4 decimals
x_min = -18.42
x_max = 10.35
y_min = 25.281898
y_max = 48.08
def run(self):
"""Chose objective"""
# Initialisation
......@@ -149,8 +191,14 @@ class Manager:
attr = chose_attr()
plt.figure(figsize=(14, 14))
the_map = Basemap(projection='mill', llcrnrlat=25.281898, llcrnrlon=-18.42, urcrnrlat=48.08, urcrnrlon=10.35,
resolution='l')
the_map = Basemap(
projection='mill',
llcrnrlat=self.y_min,
llcrnrlon=self.x_min,
urcrnrlat=self.y_max,
urcrnrlon=self.x_max,
resolution='l'
)
# draw coastlines, country boundaries, fill continents.
the_map.drawcoastlines(linewidth=0.25)
the_map.drawcountries(linewidth=0.25)
......@@ -161,7 +209,7 @@ class Manager:
the_map.drawmeridians(np.arange(0, 360, 30))
the_map.drawparallels(np.arange(-90, 90, 30))
date_ok = False # The date is valide
date_ok = False # The date is ok
query = "SELECT station, lon, lat, {} FROM {} WHERE time={}".format(attr, self.table, date)
for row in execute_query(query):
if getattr(row, "station") is None or getattr(row, attr) is None:
......@@ -203,42 +251,139 @@ class Manager:
def cluster(self):
self.table = "TABLE_TIME"
print("=== Choix 3 : CLUSTER ===")
# Ask Date
print("La date de départ :")
date_b = ask_d()
print("Entrez la date de fin sous la forme YYYY-MM-DD HH:mm")
date_e = ask_d()
# Ask information from user
print("=== Choix 3 : CLUSTER ===")
print("Vous allez devoir choisir une période de temps. Chaque station aura une pondérence suivant le nombre de"
" mesures prises durant cette péridoe")
date_begin = date_end = None
while date_begin is None or date_begin >= date_end:
print("La date de départ :")
date_begin = ask_d()
print("Entrez la date de fin :")
date_end = ask_d()
print("Entrez le nombre de cluster voulus")
nb_clust = ask_cluster()
nb_cluster = ask_int()
query = "SELECT * FROM {} WHERE time>={} AND time <= {}".format(self.table, date_b, date_e)
# Initialisation
query = "SELECT lon, lat FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING" \
"".format(self.table, date_begin, date_end)
old_centroids = None
new_centroids = [
(
random.randint(int(self.x_min * 10000), int(self.x_max * 10000)) / 10000, # x with 4 decimals
random.randint(int(self.y_min * 10000), int(self.y_max * 10000)) / 10000, # x with 4 decimals
)
for _ in range(nb_cluster)
]
while old_centroids != new_centroids:
old_centroids = new_centroids
data = [
{'x': 0, 'y': 0, 'nb': 0}
for _ in range(nb_cluster)
]
# could be parallelize
for row in execute_query(query):
distances = [
(x - row.lon) ** 2 + (y - row.lat) ** 2
for x, y in old_centroids
]
i = distances.index(min(distances))
data[i]['x'] += row.lon
data[i]['y'] += row.lat
data[i]['nb'] += 1
# end calc parallelize
if 0 in [value['nb'] for value in data]:
# cluster empty do it again
new_centroids = [
(
random.randint(int(self.x_min * 10000), int(self.y_min * 10000)) / 10000, # x with 4 decimals
random.randint(int(self.y_min * 10000), int(self.y_max * 10000)) / 10000, # x with 4 decimals
)
for _ in range(nb_cluster)
]
else:
new_centroids = [
(
float("{0:.4f}".format(elt['x'] / elt['nb'])),
float("{0:.4f}".format(elt['y'] / elt['nb'])),
)
for elt in data
]
# configuration map map
plt.figure(figsize=(14, 14))
the_map = Basemap(
projection='mill',
llcrnrlat=self.y_min,
llcrnrlon=self.x_min,
urcrnrlat=self.y_max,
urcrnrlon=self.x_max,
resolution='l'
)
# draw coastlines, country boundaries, fill continents.
the_map.drawcoastlines(linewidth=0.25)
the_map.drawcountries(linewidth=0.25)
the_map.fillcontinents(color='coral', lake_color='aqua')
# draw the edge of the map projection region (the projection limb)
the_map.drawmapboundary(fill_color='aqua')
# draw lat/lon grid lines every 30 degrees.
the_map.drawmeridians(np.arange(0, 360, 30))
the_map.drawparallels(np.arange(-90, 90, 30))
colors = [generate_color() for _ in range(nb_cluster)]
# Add centroids
for i, (lon, lat) in enumerate(old_centroids):
x, y = the_map(lon, lat)
# plt.plot(x, y, 'go')
plt.annotate("Cluster {}".format(i), (x, y), color=colors[i])
the_map.plot(x, y, marker='D', color=colors[i])
# Add all points
query = "SELECT station, lon, lat FROM {} WHERE time >= {} AND time <= {} ALLOW FILTERING" \
"".format(self.table, date_begin, date_end)
stations = set()
zones = [[] for _ in range(nb_cluster)]
for row in execute_query(query):
if getattr(row, "station") is None:
if row.station in stations:
continue
x, y = map(getattr(row, "lon"), getattr(row, "lat"))
value = getattr(row, attr)
plt.plot(x, y, 'go')
plt.annotate(round(value, 1), (x, y))
if False:
plt.figure(figsize=(14, 14))
map = Basemap(projection='mill', llcrnrlat=25.281898, llcrnrlon=-18.42, urcrnrlat=48.08, urcrnrlon=10.35,
resolution='l')
# draw coastlines, country boundaries, fill continents.
map.drawcoastlines(linewidth=0.25)
map.drawcountries(linewidth=0.25)
map.fillcontinents(color='coral', lake_color='aqua')
# draw the edge of the map projection region (the projection limb)
map.drawmapboundary(fill_color='aqua')
# draw lat/lon grid lines every 30 degrees.
map.drawmeridians(np.arange(0, 360, 30))
map.drawparallels(np.arange(-90, 90, 30))
plt.title('Map')
plt.show()
plt.savefig('./out/map.png')
else:
stations.add(row.station)
# Analyse the point
distances = [
(row.lon - x_centroid) ** 2 + (row.lat - y_centroid) ** 2
for x_centroid, y_centroid in old_centroids
]
i = distances.index(min(distances))
zones[i].append((row.lon, row.lat))
# Add the point
x, y = the_map(row.lon, row.lat)
the_map.plot(x, y, marker=".", color=colors[i])
plt.annotate(row.station, (x, y), color=colors[i])
# add zones
for i, zone in enumerate(zones):
# Remote point inside box
lats, lons = clear_zone(zone)
x, y = the_map(lons, lats)
the_map.plot(x, y, marker=None, color=colors[i])
title = "{} clusters du {} au {}".format(
nb_cluster,
datetime(*list(date_begin)).strftime('%Y-%m-%d %H:%M'),
datetime(*list(date_end)).strftime('%Y-%m-%d %H:%M')
)
plt.title(title)
for elt in ' :-':
title = title.replace(elt, '_')
path = os.path.join(DIR_OUT, title.lower() + '.png')
plt.savefig(path)
plt.show()
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment