Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Thomas Le Gluher
NF26
Commits
b21e01be
Commit
b21e01be
authored
Jun 23, 2019
by
Thomas Le Gluher
Browse files
initial commit
parent
e29f03f5
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
asos.txt
0 → 100644
View file @
b21e01be
This diff is collapsed.
Click to expand it.
loader.py
0 → 100644
View file @
b21e01be
import
csv
import
re
from
cassandra.cluster
import
Cluster
# CREATE KEYSPACE tlegluhe_metar WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 2};
# USE tlegluhe_metar;
cluster
=
Cluster
([
'localhost'
])
session
=
cluster
.
connect
(
'tlegluhe_metar'
)
csvfilename
=
'TP/asos.txt'
limit
=
200000
def
loadata
(
filename
):
dateparser
=
re
.
compile
(
"(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)"
)
with
open
(
filename
)
as
f
:
for
r
in
csv
.
DictReader
(
f
):
match_date
=
dateparser
.
match
(
r
[
"valid"
])
if
not
match_date
:
continue
date
=
match_date
.
groupdict
()
try
:
data
=
{}
data
[
"station"
]
=
r
[
"station"
]
data
[
"date"
]
=
(
int
(
date
[
"year"
]),
int
(
date
[
"month"
]),
int
(
date
[
"day"
]),
int
(
date
[
"hour"
]),
int
(
date
[
"minute"
]),
)
data
[
"lon"
]
=
float
(
r
[
"lon"
])
data
[
"lat"
]
=
float
(
r
[
"lat"
])
data
[
"tmpf"
]
=
float
(
r
[
"tmpf"
])
if
(
r
[
"tmpf"
]
!=
'M'
)
else
None
data
[
"dwpf"
]
=
float
(
r
[
"dwpf"
])
if
(
r
[
"dwpf"
]
!=
'M'
)
else
None
data
[
"relh"
]
=
float
(
r
[
"relh"
])
if
(
r
[
"relh"
]
!=
'M'
)
else
None
data
[
"drct"
]
=
float
(
r
[
"drct"
])
if
(
r
[
"drct"
]
!=
'M'
)
else
None
data
[
"sknt"
]
=
float
(
r
[
"sknt"
])
if
(
r
[
"sknt"
]
!=
'M'
)
else
None
data
[
"p01i"
]
=
float
(
r
[
"p01i"
])
if
(
r
[
"p01i"
]
!=
'M'
)
else
None
data
[
"alti"
]
=
float
(
r
[
"alti"
])
if
(
r
[
"alti"
]
!=
'M'
)
else
None
data
[
"mslp"
]
=
float
(
r
[
"mslp"
])
if
(
r
[
"mslp"
]
!=
'M'
)
else
None
data
[
"vsby"
]
=
float
(
r
[
"vsby"
])
if
(
r
[
"vsby"
]
!=
'M'
)
else
None
data
[
"gust"
]
=
float
(
r
[
"gust"
])
if
(
r
[
"gust"
]
!=
'M'
)
else
None
data
[
"skyc1"
]
=
r
[
"skyc1"
]
if
(
r
[
"skyc1"
]
!=
'M'
)
else
None
data
[
"skyc2"
]
=
r
[
"skyc2"
]
if
(
r
[
"skyc2"
]
!=
'M'
)
else
None
data
[
"skyc3"
]
=
r
[
"skyc3"
]
if
(
r
[
"skyc3"
]
!=
'M'
)
else
None
data
[
"skyc4"
]
=
r
[
"skyc4"
]
if
(
r
[
"skyc4"
]
!=
'M'
)
else
None
data
[
"skyl1"
]
=
float
(
r
[
"skyl1"
])
if
(
r
[
"skyl1"
]
!=
'M'
)
else
None
data
[
"skyl2"
]
=
float
(
r
[
"skyl2"
])
if
(
r
[
"skyl2"
]
!=
'M'
)
else
None
data
[
"skyl3"
]
=
float
(
r
[
"skyl3"
])
if
(
r
[
"skyl3"
]
!=
'M'
)
else
None
data
[
"skyl4"
]
=
float
(
r
[
"skyl4"
])
if
(
r
[
"skyl4"
]
!=
'M'
)
else
None
data
[
"wxcodes"
]
=
r
[
"wxcodes"
]
if
(
r
[
"wxcodes"
]
!=
'M'
)
else
None
data
[
"ice_accretion_1hr"
]
=
float
(
r
[
"ice_accretion_1hr"
])
if
(
r
[
"ice_accretion_1hr"
]
!=
'M'
)
else
None
data
[
"ice_accretion_3hr"
]
=
float
(
r
[
"ice_accretion_3hr"
])
if
(
r
[
"ice_accretion_3hr"
]
!=
'M'
)
else
None
data
[
"ice_accretion_6hr"
]
=
float
(
r
[
"ice_accretion_6hr"
])
if
(
r
[
"ice_accretion_6hr"
]
!=
'M'
)
else
None
data
[
"peak_wind_gust"
]
=
float
(
r
[
"peak_wind_gust"
])
if
(
r
[
"peak_wind_gust"
]
!=
'M'
)
else
None
data
[
"peak_wind_drct"
]
=
float
(
r
[
"peak_wind_drct"
])
if
(
r
[
"peak_wind_drct"
]
!=
'M'
)
else
None
data
[
"peak_wind_time"
]
=
r
[
"peak_wind_time"
]
if
(
r
[
"peak_wind_time"
]
!=
'M'
)
else
None
data
[
"feel"
]
=
float
(
r
[
"feel"
])
if
(
r
[
"feel"
]
!=
'M'
)
else
None
data
[
"metar"
]
=
r
[
"metar"
]
if
(
r
[
"metar"
]
!=
'M'
)
else
None
yield
data
except
TypeError
:
continue
def
limiteur
(
g
,
limit
):
for
i
,
d
in
enumerate
(
g
):
if
i
>=
limit
:
return
None
yield
d
create_query_1
=
'''CREATE TABLE finlande1(station text, annee int, mois int, jour int, heure int, min int, lat float, lon float, tmpf float, dwpf float, relh float, drct float, sknt float, p01i float, alti float, mslp float, vsby float, gust float, skyc1 text, skyc2 text, skyc3 text, skyc4 text, skyl1 float, skyl2 float, skyl3 float, skyl4 float, wxcodes text, ice_accretion_1hr float, ice_accretion_3hr float, ice_accretion_6hr float, peak_wind_gust float, peak_wind_drct float, peak_wind_time text,feel float, metar text, primary key((station, annee), mois, jour, heure, min) );'''
create_query_2
=
'''CREATE TABLE finlande2(station text, annee int, mois int, jour int, heure int, min int, lat float, lon float, tmpf float, dwpf float, relh float, drct float, sknt float, p01i float, alti float, mslp float, vsby float, gust float, skyc1 text, skyc2 text, skyc3 text, skyc4 text, skyl1 float, skyl2 float, skyl3 float, skyl4 float, wxcodes text, ice_accretion_1hr float, ice_accretion_3hr float, ice_accretion_6hr float, peak_wind_gust float, peak_wind_drct float, peak_wind_time text,feel float, metar text, primary key((annee, mois, jour, heure, min), lat, lon, station ));'''
create_query_3
=
'''CREATE TABLE finlande3(station text, annee int, mois int, jour int, heure int, min int, lat float, lon float, tmpf float, dwpf float, relh float, drct float, sknt float, p01i float, alti float, mslp float, vsby float, gust float, skyc1 text, skyc2 text, skyc3 text, skyc4 text, skyl1 float, skyl2 float, skyl3 float, skyl4 float, wxcodes text, ice_accretion_1hr float, ice_accretion_3hr float, ice_accretion_6hr float, peak_wind_gust float, peak_wind_drct float, peak_wind_time text,feel float, metar text, primary key((annee), mois, jour, heure, min));'''
def
writecassandra
(
csvfilename
,
limit
,
session
,
tablename
):
g
=
loadata
(
csvfilename
)
data
=
limiteur
(
g
,
limit
)
for
r
in
data
:
t
=
(
r
[
"station"
],
r
[
"date"
][
0
],
r
[
"date"
][
1
],
r
[
"date"
][
2
],
r
[
"date"
][
3
],
r
[
"date"
][
4
],
r
[
"lat"
],
r
[
"lon"
],
r
[
"tmpf"
],
r
[
"dwpf"
],
r
[
"relh"
],
r
[
"drct"
],
r
[
"sknt"
],
r
[
"p01i"
],
r
[
"alti"
],
r
[
"mslp"
],
r
[
"vsby"
],
r
[
"gust"
],
r
[
"skyc1"
],
r
[
"skyc2"
],
r
[
"skyc3"
],
r
[
"skyc4"
],
r
[
"skyl1"
],
r
[
"skyl2"
],
r
[
"skyl3"
],
r
[
"skyl4"
],
r
[
"wxcodes"
],
r
[
"ice_accretion_1hr"
],
r
[
"ice_accretion_3hr"
],
r
[
"ice_accretion_6hr"
],
r
[
"peak_wind_gust"
],
r
[
"peak_wind_drct"
],
r
[
"peak_wind_time"
],
r
[
"feel"
],
r
[
"metar"
]
)
query
=
"""
INSERT INTO """
+
tablename
+
"""(
station,
annee,
mois,
jour,
heure,
min,
lat,
lon,
tmpf,
dwpf,
relh,
drct,
sknt,
p01i,
alti,
mslp,
vsby,
gust,
skyc1,
skyc2,
skyc3,
skyc4,
skyl1,
skyl2,
skyl3,
skyl4,
wxcodes,
ice_accretion_1hr,
ice_accretion_3hr,
ice_accretion_6hr,
peak_wind_gust,
peak_wind_drct,
peak_wind_time,
feel,
metar)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
session
.
execute
(
query
,
t
)
session
.
execute
(
'DROP TABLE IF EXISTS finlande1'
)
session
.
execute
(
create_query_1
)
writecassandra
(
csvfilename
,
limit
,
session
,
'finlande1'
)
session
.
execute
(
'DROP TABLE IF EXISTS finlande2'
)
session
.
execute
(
create_query_2
)
writecassandra
(
csvfilename
,
limit
,
session
,
'finlande2'
)
session
.
execute
(
'DROP TABLE IF EXISTS finlande3'
)
session
.
execute
(
create_query_3
)
writecassandra
(
csvfilename
,
limit
,
session
,
'finlande3'
)
\ No newline at end of file
question1.py
0 → 100644
View file @
b21e01be
#POur faire marcher fonction, tapper:
# %run TP/question1 EFKI 2003 2004 tmpf
from
cassandra.cluster
import
Cluster
from
pyspark
import
SparkContext
from
functools
import
reduce
from
pyspark.sql.functions
import
avg
import
numpy
as
np
import
sys
import
matplotlib.pyplot
as
plt
import
itertools
import
config
cluster
=
Cluster
([
'localhost'
])
session
=
cluster
.
connect
(
'tlegluhe_metar'
)
session
.
default_fetch_size
=
100
sc
=
SparkContext
.
getOrCreate
()
numericindicators
=
[
'tmpf'
,
'dwpf'
,
'relh'
,
'drct'
,
'sknt'
,
'p01i'
,
'alti'
,
'mslp'
,
'vsby'
,
'gust'
,
'feel'
,
'ice_accretion_1hr'
,
'ice_accretion_3hr'
,
'ice_accretion_6hr'
,
'peak_wind_gust'
,
'peak_wind_drct'
]
if
__name__
==
"__main__"
:
param
=
"tmpf"
annee_deb
=
2003
annee_fin
=
2012
point
=
"EFKI"
if
len
(
sys
.
argv
)
>
1
:
point
=
sys
.
argv
[
1
]
if
len
(
sys
.
argv
)
>
2
:
if
(
int
(
sys
.
argv
[
2
])
>
2002
and
int
(
sys
.
argv
[
2
])
<
2013
):
annee_deb
=
int
(
sys
.
argv
[
2
])
if
len
(
sys
.
argv
)
>
3
:
if
(
int
(
sys
.
argv
[
3
])
>
2002
and
int
(
sys
.
argv
[
3
])
<
2013
and
int
(
sys
.
argv
[
3
])
>
int
(
sys
.
argv
[
2
])
):
annee_fin
=
int
(
sys
.
argv
[
3
])
if
len
(
sys
.
argv
)
>
4
:
if
(
sys
.
argv
[
4
]
in
numericindicators
):
param
=
sys
.
argv
[
4
]
else
:
print
(
'You can
\'
t choose this indicator for this question'
)
data
=
[]
for
annee
in
range
(
annee_deb
,
annee_fin
+
1
):
requete
=
"""SELECT station, annee, mois, jour, {}
FROM finlande1 where station = '{}' AND annee = {};
"""
.
format
(
param
,
point
,
annee
)
data
=
itertools
.
chain
(
data
,
session
.
execute
(
requete
))
D0
=
sc
.
parallelize
(
data
)
D0
=
D0
.
filter
(
lambda
d
:
(
d
[
4
]
is
not
None
))
if
((
annee_fin
-
annee_deb
)
<=
2
):
# PREMIER RESULTAT : GRAPHE SUR LA DUREE CHOISIE PAR MOIS
#Calcul moyenne, min, max et écart type par mois et par année
D
=
D0
.
map
(
lambda
d
:((
d
[
1
],
d
[
2
]),
np
.
array
([
1
,
d
[
4
],
d
[
4
],
d
[
4
],
d
[
4
]
**
2
])))
D
=
D
.
reduceByKey
(
lambda
a
,
b
:(
a
[
0
]
+
b
[
0
],
a
[
1
]
+
b
[
1
],
min
(
a
[
2
],
b
[
2
]),
max
(
a
[
3
],
b
[
3
]),
a
[
4
]
+
b
[
4
]))
D
=
D
.
map
(
lambda
d
:
((
d
[
0
][
0
],
d
[
0
][
1
]),
d
[
1
][
1
]
/
d
[
1
][
0
],
d
[
1
][
2
],
d
[
1
][
3
],
np
.
sqrt
(
-
(
d
[
1
][
1
]
/
d
[
1
][
0
])
**
2
+
d
[
1
][
4
]
/
d
[
1
][
0
])))
#creation tableau de données
data_month
=
np
.
zeros
(((
annee_fin
-
annee_deb
+
1
)
*
12
))
data_min
=
np
.
zeros
(((
annee_fin
-
annee_deb
+
1
)
*
12
))
data_max
=
np
.
zeros
(((
annee_fin
-
annee_deb
+
1
)
*
12
))
data_sqrt
=
np
.
zeros
(((
annee_fin
-
annee_deb
+
1
)
*
12
))
legend_month
=
[
None
]
*
((
annee_fin
-
annee_deb
+
1
)
*
12
)
for
row
in
D
.
collect
():
data_month
[(
row
[
0
][
0
]
-
annee_deb
)
*
12
+
row
[
0
][
1
]
-
1
]
=
row
[
1
]
data_min
[(
row
[
0
][
0
]
-
annee_deb
)
*
12
+
row
[
0
][
1
]
-
1
]
=
row
[
2
]
data_max
[(
row
[
0
][
0
]
-
annee_deb
)
*
12
+
row
[
0
][
1
]
-
1
]
=
row
[
3
]
data_sqrt
[(
row
[
0
][
0
]
-
annee_deb
)
*
12
+
row
[
0
][
1
]
-
1
]
=
row
[
4
]
legend_month
[(
row
[
0
][
0
]
-
annee_deb
)
*
12
+
row
[
0
][
1
]
-
1
]
=
str
(
str
(
row
[
0
][
1
])
+
"/"
+
str
(
row
[
0
][
0
]))
plt
.
clf
()
plt
.
cla
()
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
)
*
12
)
plt
.
subplot
(
221
)
plt
.
bar
(
x
,
height
=
data_month
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Moyenne par mois pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
)
*
12
)
plt
.
subplot
(
222
)
plt
.
bar
(
x
,
height
=
data_min
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Min '
+
param
)
plt
.
title
(
'Minimum par mois pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
)
*
12
)
plt
.
subplot
(
223
)
plt
.
bar
(
x
,
height
=
data_max
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Maximum par mois pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
)
*
12
)
plt
.
subplot
(
224
)
plt
.
bar
(
x
,
height
=
data_sqrt
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Ecart type '
+
param
)
plt
.
title
(
'Ecart type par mois pour '
+
point
)
plt
.
subplots_adjust
(
wspace
=
0.5
,
hspace
=
0.5
)
plt
.
savefig
(
"data_general.pdf"
)
if
((
annee_fin
-
annee_deb
)
>
2
):
# PREMIER RESULTAT : GRAPHE SUR LA DUREE CHOISIE PAR MOIS
#Calcul moyenne, min, max et écart type par mois et par année
D
=
D0
.
map
(
lambda
d
:((
d
[
1
]),
np
.
array
([
1
,
d
[
4
],
d
[
4
],
d
[
4
],
d
[
4
]
**
2
])))
D
=
D
.
reduceByKey
(
lambda
a
,
b
:(
a
[
0
]
+
b
[
0
],
a
[
1
]
+
b
[
1
],
min
(
a
[
2
],
b
[
2
]),
max
(
a
[
3
],
b
[
3
]),
a
[
4
]
+
b
[
4
]))
D
=
D
.
map
(
lambda
d
:
((
d
[
0
]),
d
[
1
][
1
]
/
d
[
1
][
0
],
d
[
1
][
2
],
d
[
1
][
3
],
np
.
sqrt
(
-
(
d
[
1
][
1
]
/
d
[
1
][
0
])
**
2
+
d
[
1
][
4
]
/
d
[
1
][
0
])))
#creation tableau de données
data_month
=
np
.
zeros
((
annee_fin
-
annee_deb
+
1
))
data_min
=
np
.
zeros
((
annee_fin
-
annee_deb
+
1
))
data_max
=
np
.
zeros
((
annee_fin
-
annee_deb
+
1
))
data_sqrt
=
np
.
zeros
((
annee_fin
-
annee_deb
+
1
))
legend_month
=
[
None
]
*
((
annee_fin
-
annee_deb
+
1
))
for
row
in
D
.
collect
():
data_month
[(
row
[
0
]
-
annee_deb
)]
=
row
[
1
]
data_min
[(
row
[
0
]
-
annee_deb
)]
=
row
[
2
]
data_max
[(
row
[
0
]
-
annee_deb
)]
=
row
[
3
]
data_sqrt
[(
row
[
0
]
-
annee_deb
)]
=
row
[
4
]
legend_month
[(
row
[
0
]
-
annee_deb
)]
=
str
(
row
[
0
])
plt
.
clf
()
plt
.
cla
()
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
))
plt
.
subplot
(
221
)
plt
.
bar
(
x
,
height
=
data_month
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Moyenne par an pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
))
plt
.
subplot
(
222
)
plt
.
bar
(
x
,
height
=
data_min
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Min '
+
param
)
plt
.
title
(
'Minimum par an pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
))
plt
.
subplot
(
223
)
plt
.
bar
(
x
,
height
=
data_max
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Maximum par an pour '
+
point
)
x
=
np
.
arange
((
annee_fin
-
annee_deb
+
1
))
plt
.
subplot
(
224
)
plt
.
bar
(
x
,
height
=
data_sqrt
)
plt
.
xticks
(
x
,
legend_month
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Ecart type '
+
param
)
plt
.
title
(
'Ecart type par an pour '
+
point
)
plt
.
subplots_adjust
(
wspace
=
0.5
,
hspace
=
0.5
)
plt
.
savefig
(
"data_general_an.pdf"
)
#traitement par saison sur toutes les années:
D2
=
D0
.
map
(
lambda
d
:(((
d
[
2
]
-
1
)
//
3
),
np
.
array
([
1
,
d
[
4
],
d
[
4
],
d
[
4
],
d
[
4
]
**
2
])))
D2
=
D2
.
reduceByKey
(
lambda
a
,
b
:(
a
[
0
]
+
b
[
0
],
a
[
1
]
+
b
[
1
],
min
(
a
[
2
],
b
[
2
]),
max
(
a
[
3
],
b
[
3
]),
a
[
4
]
+
b
[
4
]))
D2
=
D2
.
map
(
lambda
d
:
((
d
[
0
]),
d
[
1
][
1
]
/
d
[
1
][
0
],
d
[
1
][
2
],
d
[
1
][
3
],
np
.
sqrt
(
-
(
d
[
1
][
1
]
/
d
[
1
][
0
])
**
2
+
d
[
1
][
4
]
/
d
[
1
][
0
])))
data2_moy
=
np
.
zeros
(
4
)
data2_min
=
np
.
zeros
(
4
)
data2_max
=
np
.
zeros
(
4
)
data2_sqrt
=
np
.
zeros
(
4
)
legend_saison
=
[
"Printemps"
,
"Ete"
,
"Automne"
,
"Hiver"
]
for
row
in
D2
.
collect
():
data2_moy
[(
row
[
0
])]
=
row
[
1
]
data2_min
[(
row
[
0
])]
=
row
[
2
]
data2_max
[(
row
[
0
])]
=
row
[
3
]
data2_sqrt
[(
row
[
0
])]
=
row
[
4
]
plt
.
clf
()
plt
.
cla
()
x
=
np
.
arange
(
4
)
plt
.
subplot
(
221
)
plt
.
bar
(
x
,
height
=
data2_moy
)
plt
.
xticks
(
x
,
legend_saison
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Moyenne par saison pour '
+
point
)
x
=
np
.
arange
(
4
)
plt
.
subplot
(
222
)
plt
.
bar
(
x
,
height
=
data2_min
)
plt
.
xticks
(
x
,
legend_saison
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Min '
+
param
)
plt
.
title
(
'Minimum par saison pour '
+
point
)
x
=
np
.
arange
(
4
)
plt
.
subplot
(
223
)
plt
.
bar
(
x
,
height
=
data2_max
)
plt
.
xticks
(
x
,
legend_saison
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Moyenne '
+
param
)
plt
.
title
(
'Maximum par saison pour '
+
point
)
x
=
np
.
arange
(
4
)
plt
.
subplot
(
224
)
plt
.
bar
(
x
,
height
=
data2_sqrt
)
plt
.
xticks
(
x
,
legend_saison
,
fontsize
=
5
,
rotation
=
90
)
plt
.
xlabel
(
''
)
plt
.
ylabel
(
'Ecart type '
+
param
)
plt
.
title
(
'Ecart type par saison pour '
+
point
)
plt
.
subplots_adjust
(
wspace
=
0.5
,
hspace
=
0.5
)
plt
.
savefig
(
"data_saison.pdf"
)
\ No newline at end of file
question2.py
0 → 100644
View file @
b21e01be
#POur faire marcher fonction, tapper:
# %run TP/question2 tmpf 2010 1 1 0 0
from
mpl_toolkits.basemap
import
Basemap
import
matplotlib.pyplot
as
plt
import
numpy
as
np
from
cassandra.cluster
import
Cluster
from
pyspark
import
SparkContext
from
functools
import
reduce
from
pyspark.sql.functions
import
avg
import
sys
import
config
plt
.
cla
()
plt
.
clf
()
cluster
=
Cluster
([
'localhost'
])
session
=
cluster
.
connect
(
'tlegluhe_metar'
)
sc
=
SparkContext
.
getOrCreate
()
if
__name__
==
"__main__"
:
param
=
"tmpf"
annee
=
2003
mois
=
1
jour
=
1
heure
=
0
minute
=
20
if
len
(
sys
.
argv
)
>
1
:
param
=
sys
.
argv
[
1
]
if
len
(
sys
.
argv
)
>
2
:
if
(
int
(
sys
.
argv
[
2
])
>
2002
and
int
(
sys
.
argv
[
2
])
<
2013
):
annee
=
int
(
sys
.
argv
[
2
])
if
len
(
sys
.
argv
)
>
3
:
if
(
int
(
sys
.
argv
[
3
])
>
0
and
int
(
sys
.
argv
[
3
])
<
13
):
mois
=
int
(
sys
.
argv
[
3
])
if
len
(
sys
.
argv
)
>
4
:
if
(
int
(
sys
.
argv
[
4
])
>
0
and
int
(
sys
.
argv
[
4
])
<
31
):
jour
=
int
(
sys
.
argv
[
4
])
if
len
(
sys
.
argv
)
>
5
:
if
(
int
(
sys
.
argv
[
5
])
>=
0
and
int
(
sys
.
argv
[
5
])
<
24
):
heure
=
int
(
sys
.
argv
[
5
])
if
len
(
sys
.
argv
)
>
6
:
if
(
int
(
sys
.
argv
[
6
])
>=
0
or
int
(
sys
.
argv
[
6
])
<
10
):
minute
=
0
elif
(
int
(
sys
.
argv
[
6
])
>=
10
or
int
(
sys
.
argv
[
6
])
<
35
):
minute
=
20
elif
(
int
(
sys
.
argv
[
6
])
>=
35
and
int
(
sys
.
argv
[
6
])
<
60
):
minute
=
50
requete
=
"""SELECT lat,lon,station,{}
FROM finlande2 where annee = {} AND mois = {} AND jour= {} AND heure = {} AND min = {}
LIMIT 5000
;
"""
.
format
(
param
,
annee
,
mois
,
jour
,
heure
,
minute
)
data
=
session
.
execute
(
requete
)
lats
=
[]
lons
=
[]
values
=
[]
for
row
in
data
:
if
(
row
[
0
]
!=
None
)
&
(
row
[
0
]
>=
58
)
&
(
row
[
0
]
<=
72
)
&
(
row
[
1
]
!=
None
)
&
(
row
[
1
]
>=
20
)
&
(
row
[
1
]
<=
32
)
&
(
row
[
3
]
!=
None
):
lats
.
append
(
row
[
0
])
lons
.
append
(
row
[
1
])
if
(
param
in
numericindicators
):
values
.
append
(
round
(
row
[
3
],
2
))
else
:
values
.
append
(
row
[
3
])
map
=
Basemap
(
projection
=
'merc'
,
lat_0
=
65
,
lon_0
=
26
,
resolution
=
'h'
,
area_thresh
=
0.1
,
llcrnrlon
=
20
,
llcrnrlat
=
59
,
urcrnrlon
=
32
,
urcrnrlat
=
71
)
map
.
drawcountries
()
map
.
drawmapboundary
(
fill_color
=
'#46bcec'
)
map
.
fillcontinents
(
color
=
'white'
,
lake_color
=
'#46bcec'
)
x
,
y
=
map
(
lons
,
lats
)
map
.
plot
(
x
,
y
,
'bo'
,
markersize
=
3
)
labels
=
values
for
label
,
xpt
,
ypt
in
zip
(
labels
,
x
,
y
):
plt
.
text
(
xpt
+
20000
,
ypt
+
20000
,
label
,
fontsize
=
6
)
plt
.
title
(
''
+
param
+
' à '
+
str
(
heure
)
+
'h'
+
str
(
minute
)
+
' le '
+
str
(
jour
)
+
'/'
+
str
(
mois
)
+
'/'
+
str
(
annee
)
+
''
)
plt
.
savefig
(
"carte_finlande.pdf"
)
print
(
"Done ! Check the file carte_finlande.pdf at the root of this project"
)
question3.py
0 → 100644
View file @
b21e01be
#Pour faire marcher cette fonction, tapper:
# %run TP/question3 2009 2010 1 3 tmpf
from
cassandra.cluster
import
Cluster
import
sys
import
numpy
as
np
import
glob
from
math
import
sqrt
import
random
from
pyspark
import
SparkContext
from
functools
import
reduce
import
matplotlib.pyplot
as
plt
from
mpl_toolkits.basemap
import
Basemap
import
matplotlib.cm
as
cm
import
itertools
import
config
plt
.
cla
()
plt
.
clf
()
cluster
=
Cluster
([
'localhost'
])
session
=
cluster
.
connect
(
'tlegluhe_metar'
)
session
.
default_fetch_size
=
100
sc
=
SparkContext
.
getOrCreate
()
def
getIndicatorCluster
():
data
=
[]
for
annee
in
range
(
annee_deb
,
annee_fin
+
1
):
if
(
annee
==
annee_deb
):
requete
=
"""SELECT lat,lon, {}
FROM finlande3 where annee = {} and mois >= {} ;
"""
.
format
(
param
,
annee
,
mois_deb
)
data
=
itertools
.
chain
(
data
,
session
.
execute
(
requete
))
elif
(
annee
==
(
annee_fin
+
1
)):
requete
=
"""SELECT lat,lon, {}
FROM finlande3 where annee = {} and mois <= {} ;
"""
.
format
(
param
,
annee
,
mois_fin
)
data
=
itertools
.
chain
(
data
,
session
.
execute
(
requete
))
else
:
requete
=
"""SELECT lat,lon, {}
FROM finlande3 where annee = {};
"""
.
format
(
param
,
annee
)
data
=
itertools
.
chain
(
data
,
session
.
execute
(
requete
))
for
dat
in
data
:
flag
=
True
for
x
in
dat
:
if
(
x
==
None
):
flag
=
False