Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Thomas Meurou
NF26 Project
Commits
66bb71c2
Commit
66bb71c2
authored
Jun 23, 2019
by
Tianyang
Browse files
add compare month
parent
ec24abd2
Changes
2
Show whitespace changes
Inline
Side-by-side
.DS_Store
View file @
66bb71c2
No preview for this file type
question3_compare_month.py
0 → 100644
View file @
66bb71c2
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
functools
import
reduce
from
database_pre3
import
connection
import
matplotlib.pyplot
as
plt
import
re
import
folium
import
random
import
datetime
table_variable
=
[
'date'
,
'lon'
,
'lat'
,
'station'
,
'time'
,
'alti'
,
'drct'
,
'dwpf'
,
'feel'
,
'gust'
,
'ice_accretion_1hr'
,
'ice_accretion_3hr'
,
'ice_accretion_6hr'
,
'metar'
,
'mslp'
,
'p01i'
,
'peak_wind_drct'
,
'peak_wind_gust'
,
'peak_wind_time'
,
'relh'
,
'sknt'
,
'skyc1'
,
'skyc2'
,
'skyc3'
,
'skyc4'
,
'skyl1'
,
'skyl2'
,
'skyl3'
,
'skyl4'
,
'tmpf'
,
'vsby'
,
'wxcodes'
]
def
add
(
x
,
y
):
return
x
+
y
def
abs_diff
(
x
,
y
):
longth
=
len
(
x
)
result
=
[]
for
i
in
range
(
longth
):
result
.
append
(
abs
(
x
[
i
]
-
y
[
i
]))
total
=
0
for
each
in
result
:
total
+=
each
return
total
def
diff
(
x
,
y
):
return
x
-
y
#caculate mean reduce
#input [count,mean]
def
reduceFonction
(
x
,
y
):
result
=
[]
for
i
in
range
(
2
):
result
.
append
(
reduce
(
add
,[
x
[
i
],
y
[
i
]]))
return
result
#input [valeur] -> [count,mean]
def
mapFonction1
(
x
):
return
[
1
,
x
]
#input [count,mean] -> [mean]
def
mapFonction2
(
x
):
return
x
[
1
]
/
x
[
0
]
#Map reduce to caculate the means of each month of each station
def
mapReduce_kmeans
(
targetNB
,
start
,
end
):
results
=
dict
()
for
row
in
session
.
execute
(
"select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"
%
(
start
,
end
)):
data_target
=
row
[
targetNB
]
if
data_target
==
None
:
continue
#We want to take out the month correspondant and add month into keys
dateparser
=
re
.
compile
(
"(\d+)-(?P<month>\d+)-(\d+) (?P<time>\d+:\d+)"
)
match_month
=
dateparser
.
match
(
str
(
row
[
0
]))
time
=
match_month
.
groupdict
()
month
=
time
[
"month"
]
data_espace
=
(
row
[
1
],
row
[
2
],
row
[
3
],
month
)
if
results
.
get
(
data_espace
)
is
None
:
results
[
data_espace
]
=
mapFonction1
(
data_target
)
else
:
mapresult
=
mapFonction1
(
data_target
)
results
[
data_espace
]
=
reduceFonction
(
mapresult
,
results
[
data_espace
])
for
eachEspace
in
results
:
results
[
eachEspace
]
=
mapFonction2
(
results
[
eachEspace
])
#Now we need to put the data of each station all together
newResult
=
dict
()
for
each
in
results
:
newKey
=
each
[
0
:
3
]
if
newResult
.
get
(
newKey
)
is
None
:
newResult
[
newKey
]
=
[
results
[
each
]]
else
:
newResult
[
newKey
]
=
newResult
[
newKey
]
+
[
results
[
each
]]
max
=
0
for
each
in
newResult
:
if
len
(
newResult
[
each
])
>
max
:
max
=
len
(
newResult
[
each
])
deletList
=
[]
#We want to check if there is a missing data so we check the number of month max
#Then we delete all the station who have the missing data
for
each
in
newResult
:
if
len
(
newResult
[
each
])
<
max
:
deletList
.
append
(
each
)
for
delete
in
deletList
:
newResult
.
pop
(
delete
)
return
newResult
#Caculate the difference of the number of the clusters
def
cluster_nb_diff
(
centre_new
,
centre
):
sum
=
0
for
i
in
range
(
3
):
sum
+=
abs
(
centre_new
[
i
][
0
]
-
centre
[
i
][
0
])
return
sum
/
3
#input [tmpt] -> [tmpt,tmpt,tmpt,mean]
def
map1_kmeans
(
x
):
return
[
x
,
x
,
x
,
x
]
#input [tmpt,tmpt,tmpt,tmpt] and [c1,c2,c3,0] -> [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt]
def
reduceKmeans
(
x
,
y
):
result
=
[]
for
i
in
range
(
3
):
result
.
append
(
reduce
(
abs_diff
,[
x
[
i
],
y
[
i
]]))
result
.
append
(
x
[
3
])
return
result
#input [|tmpt - c1|,|tmpt - c2|,|tmpt - c3|,tmpt] -> [cluster number, min(|tmpt - c|), tmpt]
def
map2_kmeans
(
x
):
min_value
=
10000000000000
index
=
0
for
each
in
range
(
3
):
if
min_value
>
x
[
each
]:
min_value
=
x
[
each
]
index
=
each
return
[
index
,
min_value
,
x
[
3
]]
#Update the new center by means
def
MapnewCentre
(
x
):
result
=
[]
longth
=
len
(
x
[
1
])
for
i
in
range
(
longth
):
if
x
[
0
]
!=
0
:
result
.
append
(
x
[
1
][
i
]
/
x
[
0
])
else
:
result
.
append
(
0
)
return
result
#caculate moyen
def
caculateMoyen
(
x
):
total
=
0
count
=
0
for
i
in
x
:
count
+=
1
total
+=
i
return
total
/
count
#caculate sum of two list
def
caculateSumofList
(
x
,
y
):
if
x
==
[]:
return
y
longth
=
len
(
x
)
result
=
[]
for
i
in
range
(
longth
):
result
.
append
(
x
[
i
]
+
y
[
i
])
return
result
#The main algorithm of Kmeans
def
kmeans
(
targetNB
,
target
,
start
,
end
):
#cluster est pour stocler lat, lon de chaque point de chaque cluster
cluster
=
[[],[],[]]
result
=
mapReduce_kmeans
(
targetNB
,
start
,
end
)
if
len
(
result
)
<
3
:
raise
Exception
(
"We
\'
ve just searched less than 3 station!!"
)
#mettre ramdom 3 point comme le centres init
center1
=
random
.
choice
(
list
(
result
))
center2
=
random
.
choice
(
list
(
result
))
center3
=
random
.
choice
(
list
(
result
))
init_point_values
=
[
result
[
center1
],
result
[
center2
],
result
[
center3
]]
#init_point_values = [result[i] for i in result.keys()][:3]
init_point_keys
=
[
center1
,
center2
,
center3
]
#3centre with [point count, temprature centre]
centre
=
{
0
:[
0
,
0
],
1
:[
0
,
0
],
2
:[
0
,
0
]}
centre_new
=
{
0
:[
0
,
0
],
1
:[
0
,
0
],
2
:[
0
,
0
]}
for
key
in
centre
.
keys
():
centre
[
key
]
=
[
1
,
init_point_values
[
key
]]
centre_new
[
key
]
=
[
1
,
init_point_values
[
key
]]
cluster
[
key
].
append
(
init_point_keys
[
key
])
#init the centre new and result new for mapreduce
result_new
=
dict
()
#When the number of point of cluster don't change,stop
while
True
:
for
eachkey
in
result
:
if
eachkey
in
cluster
[
0
]
or
eachkey
in
cluster
[
1
]
or
eachkey
in
cluster
[
2
]:
continue
#caculate the distance between the data of this lingne and the centre
#Map1_kemeans
result_new
[
eachkey
]
=
map1_kmeans
(
result
[
eachkey
])
centre_values
=
[
centre
[
0
][
1
],
centre
[
1
][
1
],
centre
[
2
][
1
]]
#Reduce
result_new
[
eachkey
]
=
reduceKmeans
(
result_new
[
eachkey
],
centre_values
)
#Map2_kmeans
result_new
[
eachkey
]
=
map2_kmeans
(
result_new
[
eachkey
])
#Put all the distance and points into the clusters
#Result format [cluster number, min(|tmpt - c|),tmpt - c]
for
eachpoint
in
result_new
:
clusterNB
=
result_new
[
eachpoint
][
0
]
centre_new
[
clusterNB
][
0
]
+=
1
centre_new
[
clusterNB
][
1
]
=
caculateSumofList
(
centre_new
[
clusterNB
][
1
],
result_new
[
eachpoint
][
2
])
cluster
[
clusterNB
].
append
(
eachpoint
)
#compare centre_new and centre, if
if
not
cluster_nb_diff
(
centre_new
,
centre
)
>
1
:
break
else
:
#caculate the new centre
print
(
"Cluster number differ: "
,
cluster_nb_diff
(
centre_new
,
centre
))
for
eachculster
in
centre_new
:
centre_new
[
eachculster
][
1
]
=
MapnewCentre
(
centre_new
[
eachculster
])
print
(
"center new: "
,
centre_new
)
print
(
"center old: "
,
centre
)
centre
=
centre_new
centre_new
=
{
0
:[
0
,[]],
1
:[
0
,[]],
2
:[
0
,[]]}
result_new
=
dict
()
cluster
=
[[],[],[]]
createMap
(
cluster
,
result
)
#Create the map of the cluster
def
createMap
(
cluster
,
result
):
mean_lat
=
0
count
=
0
for
each
in
[
cluster
[
0
],
cluster
[
1
],
cluster
[
2
]]:
for
each_pos
in
each
:
mean_lat
+=
each_pos
[
0
]
count
+=
1
mean_lat
=
mean_lat
/
count
mean_lon
=
0
count
=
0
for
each
in
[
cluster
[
0
],
cluster
[
1
],
cluster
[
2
]]:
for
each_pos
in
each
:
mean_lon
+=
each_pos
[
1
]
count
+=
1
mean_lon
=
mean_lon
/
count
m
=
folium
.
Map
(
location
=
[
mean_lon
,
mean_lat
],
zoom_start
=
6
)
color
=
{
0
:
'blue'
,
1
:
'red'
,
2
:
'green'
}
i
=
0
for
each
in
[
cluster
[
0
],
cluster
[
1
],
cluster
[
2
]]:
for
each_pos
in
each
:
label
=
str
(
each_pos
[
2
])
+
"
\n
"
+
""
+
target
+
":"
+
str
(
round
(
caculateMoyen
(
result
[(
each_pos
[
0
],
each_pos
[
1
],
each_pos
[
2
])]),
2
))
folium
.
Marker
([
each_pos
[
1
],
each_pos
[
0
]],
popup
=
label
,
icon
=
folium
.
Icon
(
color
=
color
[
i
])).
add_to
(
m
)
i
+=
1
m
.
save
(
"Projet-NF26/map.html"
)
print
(
"Generate successfully"
)
#Check which number of the indicateur
def
checkNBvariable
(
x
):
i
=
0
for
each
in
table_variable
:
if
x
==
each
:
return
i
i
+=
1
print
(
'Doesn
\'
t exist!!'
)
if
__name__
==
"__main__"
:
session
=
connection
()
start
=
input
(
"Please enter the start time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: "
)
end
=
input
(
"Please enter the end time [form: AAAA-MM-DD (From 2008-1-1 to 2017-12-30)]: "
)
target
=
input
(
"Which indicator do you want to check [tmpf,dwpf,etc]: "
)
targetNB
=
checkNBvariable
(
target
)
#start = '2008-01-01'
#end = '2013-12-12'
#data = session.execute_async("select * from caitiany.database_kmeans where date >= '%s' and date <= '%s' ALLOW FILTERING"%(start,end))
kmeans
(
targetNB
,
target
,
start
,
end
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment