Commit 2b1e9aad authored by Tom Fleurant's avatar Tom Fleurant
Browse files

Initial commit

parents
{
"python.pythonPath": "venv/bin/python",
"python.formatting.provider": "black"
}
\ No newline at end of file
SELECT
P.Age as AGE,
Count(*)::decimal/ (select count(*) from fbdl.dim_people where age = P.age group by age) as NB_CALLS_AGAINT_NB_CUSTOMERS,
Count(*) as NB_CALLS,
(select count(*) from fbdl.dim_people where age = P.age group by age) as NB_CUSTOMERS
FROM fbdl.Calls C
LEFT JOIN fbdl.dim_people P ON P.row_num = C.FK_People
GROUP BY P.Age ORDER BY P.AGE;
select
Age,
Count(*)
from fbdl.dim_people
where Age notnull
group by age
order by age;
select
L.departementcode as Departement_Code,
count(*)::decimal/(select population from fbdl.dim_location where departementcode = L.departementcode) * 10000 as NB_CALLS_AGAINST_POPULATION,
L.departementlabel as Departement_Name
from fbdl.calls C
left join fbdl.dim_location L on L.row_num = C.fk_location
group by L.departementcode, L.departementlabel
order by L.departementcode;
select
P.gender as GENDER,
Count(*)::decimal/(select count(*) from fbdl.dim_people where gender = P.gender) as CALLS_AGAINST_GENDER_BY_NB_CUSTOMERS,
Count(*) as CALLS_AGAINST_GENDER,
(select count(*) from fbdl.dim_people where gender = P.gender) as NB_CUSTOMER_BY_GENDER
from fbdl.calls C
left join fbdl.dim_people P on P.row_num = C.fk_people
group by P.gender;
select
D.joursemaine AS Day_OF_WEEK,
count(*)::decimal/(select count(*) from fbdl.dim_date where joursemaine=D.joursemaine group by joursemaine) NB_CALLS_DOW_BY_NB_OF_DATES,
count(*) as NB_CALLS_AGAINST_DOW,
(select count(*) from fbdl.dim_date where joursemaine=D.joursemaine group by joursemaine) NB_Dates_DOW
from fbdl.calls C
left join fbdl.dim_date D on D.row_num = C.fk_date
group by D.joursemaine
order by D.joursemaine;
select
P.age as AGE,
(sum(C.disconnection)::decimal/count(*)) as NB_Disconnections_Against_NB_CALLS,
count(*) NB_CALLS,
sum(C.disconnection) as NB_Disconnections,
(select count(*) from fbdl.dim_people where age = P.age group by age) NB_Customers_BY_age
from fbdl.calls C
left join fbdl.dim_people P on P.row_num = C.fk_people
group by P.age
order by P.age
;
select * from fbdl.calls;
"age"|"nb_calls_againt_nb_customers"|"nb_calls"|"nb_customers"
19|13.6666666666666667|41|3
20|10.1666666666666667|122|12
21|9.0769230769230769|118|13
22|8.1000000000000000|81|10
23|8.6250000000000000|69|8
24|11.2307692307692308|146|13
25|7.5000000000000000|45|6
26|10.0000000000000000|80|8
27|11.4444444444444444|103|9
28|12.2857142857142857|172|14
29|10.9090909090909091|120|11
30|9.6111111111111111|173|18
31|12.2000000000000000|183|15
32|9.5000000000000000|114|12
33|10.6666666666666667|64|6
34|8.9285714285714286|125|14
35|10.0000000000000000|80|8
36|11.7647058823529412|200|17
37|10.2500000000000000|123|12
38|13.3000000000000000|133|10
39|10.5000000000000000|84|8
40|13.3333333333333333|160|12
41|9.8181818181818182|108|11
42|9.6363636363636364|106|11
43|11.0000000000000000|154|14
44|8.9375000000000000|143|16
45|11.6666666666666667|105|9
46|11.2142857142857143|157|14
47|8.1666666666666667|49|6
48|8.7142857142857143|61|7
49|9.1250000000000000|73|8
50|10.5000000000000000|63|6
51|8.1111111111111111|73|9
52|9.5714285714285714|67|7
53|9.2857142857142857|130|14
54|9.6250000000000000|77|8
55|10.0769230769230769|131|13
56|8.8333333333333333|53|6
57|9.4000000000000000|47|5
58|10.0000000000000000|70|7
59|10.1111111111111111|91|9
60|10.5000000000000000|63|6
61|8.2857142857142857|58|7
62|13.6666666666666667|41|3
63|8.8333333333333333|53|6
64|8.0000000000000000|24|3
65|10.7500000000000000|43|4
66|7.5000000000000000|15|2
67|10.5000000000000000|21|2
68|9.5000000000000000|19|2
69|18.0000000000000000|18|1
70|11.0000000000000000|11|1
71|11.0000000000000000|11|1
72|18.0000000000000000|18|1
75|12.0000000000000000|24|2
76|9.3333333333333333|28|3
77|13.0000000000000000|13|1
78|8.0000000000000000|8|1
80|5.0000000000000000|5|1
82|6.7500000000000000|27|4
84|10.0000000000000000|10|1
85|5.0000000000000000|5|1
88|16.0000000000000000|16|1
89|12.0000000000000000|12|1
"day_of_week"|"nb_calls_dow_by_nb_of_dates"|"nb_calls_against_dow"|"nb_dates_dow"
1|10.4905660377358491|556|53
2|13.1346153846153846|683|52
3|15.1153846153846154|786|52
4|3.0000000000000000|144|48
5|17.0576923076923077|887|52
6|16.3076923076923077|848|52
7|17.9423076923076923|933|52
"departement_code"|"nb_calls_against_population"|"departement_nom"
"01"|1.28452636868173590000|Ain
"02"|0.941483198146002317500000|Aisne
"03"|1.82016745540589730000|Allier
"04"|3.24565462091444590000|Alpes-de-Haute-Provence
"05"|3.79039135790770400000|Hautes-Alpes
"06"|0.420451545403877932160000|Alpes-Maritimes
"07"|1.86743265358784740000|Ardèche
"08"|1.93872298322670360000|Ardennes
"09"|4.48072587759216990000|Ariège
"10"|1.52627178255272270000|Aube
"11"|1.43924958778014520000|Aude
"12"|2.05199097123972650000|Aveyron
"13"|0.241796645796929504990000|Bouches-du-Rhône
"14"|0.783831367725441960300000|Calvados
"15"|2.73048812237666770000|Cantal
"16"|0.989796610936404153750000|Charente
"17"|1.07046601529385160000|Charente-Maritime
"18"|1.50708483657356570000|Cher
"19"|1.51477933349709330000|Corrèze
"20"|1.57156808818742070000|Corse
"21"|1.05515182675558080000|Côte-d'Or
"22"|1.04398725273869710000|Côtes-d'Armor
"23"|3.51676579868809350000|Creuse
"24"|1.01887655192294310000|Dordogne
"25"|1.16432733898808310000|Doubs
"26"|1.13149277175794040000|Drôme
"27"|0.753973169326217262750000|Eure
"28"|1.45448649473523580000|Eure-et-Loir
"29"|0.710786581251932098450000|Finistère
"30"|0.830463273153472746700000|Gard
"31"|0.430965024377630291970000|Haute-Garonne
"32"|2.72264574489365010000|Gers
"33"|0.425732487950250117840000|Gironde
"34"|0.603373185218015187450000|Hérault
"35"|0.548381825954799907780000|Ille-et-Vilaine
"36"|2.75122658852071550000|Indre
"37"|0.809814957282261003360000|Indre-et-Loire
"38"|0.429940712967101474070000|Isère
"39"|2.32893762265101820000|Jura
"40"|1.40772783928440500000|Landes
"41"|1.50587136170722790000|Loir-et-Cher
"42"|0.591071330219462098220000|Loire
"43"|2.84257629107981220000|Haute-Loire
"44"|0.437681455436733145620000|Loire-Atlantique
"45"|0.989217528934612721340000|Loiret
"46"|4.06477335899719650000|Lot
"47"|1.47824309235874130000|Lot-et-Garonne
"48"|5.63907365399156700000|Lozère
"49"|0.647180801183416322160000|Maine-et-Loire
"50"|1.08217083469439700000|Manche
"51"|0.720286879974481264820000|Marne
"52"|2.59304379308489040000|Haute-Marne
"53"|1.86079872247709530000|Mayenne
"54"|0.671236554720847785470000|Meurthe-et-Moselle
"55"|2.69553888314838940000|Meuse
"56"|0.745639870855174367880000|Morbihan
"57"|0.534574564560379242470000|Moselle
"58"|2.09817758289942450000|Nièvre
"59"|0.180952897960660839980000|Nord
"60"|0.494709776606823696850000|Oise
"61"|1.52047518154804210000|Orne
"62"|0.292560990461831336550000|Pas-de-Calais
"63"|0.821569584527428342220000|Puy-de-Dôme
"64"|0.998770867464716808190000|Pyrénées-Atlantiques
"65"|2.41567774858833830000|Hautes-Pyrénées
"66"|1.52189153653662390000|Pyrénées-Orientales
"67"|0.613625751571696895720000|Bas-Rhin
"68"|0.763194593807022777890000|Haut-Rhin
"69"|0.298907679249642089190000|Rhône
"70"|1.58928653581540860000|Haute-Saône
"71"|0.955457965157632870580000|Saône-et-Loire
"72"|0.732785942234484173660000|Sarthe
"73"|1.45277570959013570000|Savoie
"74"|0.739337368883138487130000|Haute-Savoie
"75"|0.242101778656586541840000|Paris
"76"|0.443570680611683968560000|Seine-Maritime
"77"|0.420152144505975222560000|Seine-et-Marne
"78"|0.283395014137051282140000|Yvelines
"79"|1.08675358837675240000|Deux-Sèvres
"80"|0.825091198910177412160000|Somme
"81"|1.15238459289906240000|Tarn
"82"|2.34545778645075950000|Tarn-et-Garonne
"83"|0.629371008784066062250000|Var
"84"|1.09661791286195730000|Vaucluse
"85"|0.964478610007715828880000|Vendée
"86"|1.21336840776944060000|Vienne
"87"|0.987816924596641422460000|Haute-Vienne
"88"|1.31467838415913680000|Vosges
"89"|1.15922204608487240000|Yonne
"90"|3.37795042857746060000|Territoire de Belfort
"91"|0.408920380591768693750000|Essonne
"92"|0.367314040336625991830000|Hauts-de-Seine
"93"|0.438456202897404842430000|Seine-Saint-Denis
"94"|0.371931765074960421610000|Val-de-Marne
"95"|0.402487911946377877120000|Val-d'Oise
"age"|"nb_disconnections_against_nb_calls"|"nb_calls"|"nb_disconnections"|"nb_customers_by_age"
19|1.5121951219512195|41|62|3
20|1.4672131147540984|122|179|12
21|1.5084745762711864|118|178|13
22|1.6049382716049383|81|130|10
23|1.5217391304347826|69|105|8
24|1.4726027397260274|146|215|13
25|1.6000000000000000|45|72|6
26|1.6125000000000000|80|129|8
27|1.4757281553398058|103|152|9
28|1.5581395348837209|172|268|14
29|1.5250000000000000|120|183|11
30|1.4913294797687861|173|258|18
31|1.4972677595628415|183|274|15
32|1.5263157894736842|114|174|12
33|1.6250000000000000|64|104|6
34|1.5440000000000000|125|193|14
35|1.5500000000000000|80|124|8
36|1.4750000000000000|200|295|17
37|1.5447154471544715|123|190|12
38|1.5263157894736842|133|203|10
39|1.5000000000000000|84|126|8
40|1.5187500000000000|160|243|12
41|1.5462962962962963|108|167|11
42|1.4433962264150943|106|153|11
43|1.6233766233766234|154|250|14
44|1.5734265734265734|143|225|16
45|1.5238095238095238|105|160|9
46|1.4840764331210191|157|233|14
47|1.5306122448979592|49|75|6
48|1.6885245901639344|61|103|7
49|1.6027397260273973|73|117|8
50|1.5555555555555556|63|98|6
51|1.5479452054794521|73|113|9
52|1.4029850746268657|67|94|7
53|1.4538461538461538|130|189|14
54|1.6493506493506494|77|127|8
55|1.5725190839694656|131|206|13
56|1.6981132075471698|53|90|6
57|1.6170212765957447|47|76|5
58|1.5142857142857143|70|106|7
59|1.5274725274725275|91|139|9
60|1.4444444444444444|63|91|6
61|1.6724137931034483|58|97|7
62|1.3170731707317073|41|54|3
63|1.6226415094339623|53|86|6
64|1.8333333333333333|24|44|3
65|1.5348837209302326|43|66|4
66|1.6666666666666667|15|25|2
67|1.6190476190476190|21|34|2
68|1.3157894736842105|19|25|2
69|1.3333333333333333|18|24|1
70|1.6363636363636364|11|18|1
71|1.4545454545454545|11|16|1
72|1.6111111111111111|18|29|1
75|1.5000000000000000|24|36|2
76|1.6428571428571429|28|46|3
77|1.5384615384615385|13|20|1
78|1.2500000000000000|8|10|1
80|1.2000000000000000|5|6|1
82|1.6296296296296296|27|44|4
84|1.8000000000000000|10|18|1
85|1.8000000000000000|5|9|1
88|1.3750000000000000|16|22|1
89|1.3333333333333333|12|16|1
"gender"|"calls_against_gender_by_nb_customers"|"calls_against_gender"|"nb_customer_by_gender"
F|13.9068627450980392|2837|204
M|6.9617486338797814|1274|183
U|8.3448275862068966|726|87
import matplotlib.pyplot as plt
import psycopg2
import sys
def bdd():
try:
conn = psycopg2.connect(
host="tuxa.sme.utc",
dbname="dbbdd1p006",
user="bdd1p006",
password="NOP",
)
cur = conn.cursor()
return conn, cur
except:
print("Connexion impossible")
sys.exit()
if __name__ == "__main__":
conn, cur = bdd()
sql = "\
select\
P.age as AGE,\
(sum(C.disconnection)::decimal/count(*)) as NB_Disconnections_Against_NB_CALLS,\
count(*) NB_CALLS,\
sum(C.disconnection) as NB_Disconnections,\
(select count(*) from fbdl.dim_people where age = P.age group by age) NB_Customers_BY_age\
from fbdl.calls C\
left join fbdl.dim_people P on P.row_num = C.fk_people\
group by P.age\
order by P.age\
;"
cur.execute(sql)
x = []
y = []
for row in cur:
x.append(row[0])
y.append(row[1])
print(x)
print(y)
plt.bar(x, y)
plt.ylabel("Nombre de déconnexions")
plt.xlabel("Age")
plt.title("Nombre moyen de déconnexions en fonction de l'âge du client")
plt.show()
cur.close()
conn.close()
# Analyse
## Question 1 : Âge des clients et nombre d'appels
Soit la question suivante exprimée dans l'expression de besoins et ici reformulée.
*L'âge des clients a-t-il un impact sur leur nombre d'appels par notre service?*
### SQL
Traduite en SQL, on obtient la requête suivante:
```
SELECT
P.Age as AGE,
Count(*)::decimal/ (select count(*) from fbdl.dim_people where age = P.age group by age) as NB_CALLS_AGAINT_NB_CUSTOMERS,
Count(*) as NB_CALLS,
(select count(*) from fbdl.dim_people where age = P.age group by age) as NB_CUSTOMERS
FROM fbdl.Calls C
LEFT JOIN fbdl.dim_people P ON P.row_num = C.FK_People
GROUP BY P.Age ORDER BY P.AGE;
```
#### Remarque
Comme indiqué dans la requête, on est ici obligé de prendre en compte le nombre de personne par age avant de pouvoir comparer les nombres d'appel, la population d'utilisateurs dans la tranche 20-60 ans étant logiquement plus élevée que dans la tranche 80-90 ans.
### Résultats
En exécutant cette requête, on obtient les résultats suivants:
| age | nb_calls_againt_nb_customers | nb_calls |nb_customers |
|--- | --- | --- | --- |
|19|13.6666666666666667|41|3
|20|10.1666666666666667|122|12
|21|9.0769230769230769|118|13
|22|8.1000000000000000|81|10
|23|8.6250000000000000|69|8
|24|11.2307692307692308|146|13
|25|7.5000000000000000|45|6
|26|10.0000000000000000|80|8
|27|11.4444444444444444|103|9
|28|12.2857142857142857|172|14
|29|10.9090909090909091|120|11
|30|9.6111111111111111|173|18
|31|12.2000000000000000|183|15
|32|9.5000000000000000|114|12
|33|10.6666666666666667|64|6
|34|8.9285714285714286|125|14
|35|10.0000000000000000|80|8
|36|11.7647058823529412|200|17
|37|10.2500000000000000|123|12
|38|13.3000000000000000|133|10
|39|10.5000000000000000|84|8
|40|13.3333333333333333|160|12
|41|9.8181818181818182|108|11
|42|9.6363636363636364|106|11
|43|11.0000000000000000|154|14
|44|8.9375000000000000|143|16
|45|11.6666666666666667|105|9
|46|11.2142857142857143|157|14
|47|8.1666666666666667|49|6
|48|8.7142857142857143|61|7
|49|9.1250000000000000|73|8
|50|10.5000000000000000|63|6
|51|8.1111111111111111|73|9
|52|9.5714285714285714|67|7
|53|9.2857142857142857|130|14
|54|9.6250000000000000|77|8
|55|10.0769230769230769|131|13
|56|8.8333333333333333|53|6
|57|9.4000000000000000|47|5
|58|10.0000000000000000|70|7
|59|10.1111111111111111|91|9
|60|10.5000000000000000|63|6
|61|8.2857142857142857|58|7
|62|13.6666666666666667|41|3
|63|8.8333333333333333|53|6
|64|8.0000000000000000|24|3
|65|10.7500000000000000|43|4
|66|7.5000000000000000|15|2
|67|10.5000000000000000|21|2
|68|9.5000000000000000|19|2
|69|18.0000000000000000|18|1
|70|11.0000000000000000|11|1
|71|11.0000000000000000|11|1
|72|18.0000000000000000|18|1
|75|12.0000000000000000|24|2
|76|9.3333333333333333|28|3
|77|13.0000000000000000|13|1
|78|8.0000000000000000|8|1
|80|5.0000000000000000|5|1
|82|6.7500000000000000|27|4
|84|10.0000000000000000|10|1
|85|5.0000000000000000|5|1
|88|16.0000000000000000|16|1
|89|12.0000000000000000|12|1
Cette requête a été exécutée par le script plot.py, qui a dessiné un graphe des résultats:
![](./NB_Calls_Againts_NB_Customers_By_Age.png)
A première vue des résultats et du graphique, on constate un pic du nombre d'appels par personne autour de 70 et 90 ans. Cependant, si on regarde l'extraction de données effectuée, on peut remarquer qu'il n'y que très peu d'utilisateur de plus de 65 ans, ce qui fausse complètement toute moyenne effectuée sur ces données.
En prenant en compte ce fait, l'âge ne semble pas être unn facteur décisif du nombre d'appels, même si on peut remarquer de légers pics vers les utilisateurs de 40 ans, qui pourraient une explication statistique.
Le graphique ci-dessous n'a pour seul intérêt de montrer la répartition des appels sans comparer au nombre d'utilisateurs par age. Aucune conclusion ne doit être tirée de celui-ci.
![](./NB_Calls_Against_NB_Customers.png)
Pour information supplémentaire, la répartition des utilisateurs par âge:
![](./Repartition_Population.png)
## Question 2 : Répartition géographique des appels
Dans le cas où l'on chercherait la région la plus active en nombre d'appels, on pourrait avoir la question suivante:
*Quelle est la répartition géographique des appels?*
### SQL
Il est à noter qu'il faut ramener le nombre d'appels pour un département à la population de celui-ci.
Cette demande peut se traduire en SQL de la façon suivante:
```
select
L.departementcode as Departement_Code,
count(*)::decimal/(select population from fbdl.dim_location where departementcode = L.departementcode) * 10000 as NB_CALLS_AGAINST_POPULATION,
L.departementlabel as Departement_Name
from fbdl.calls C
left join fbdl.dim_location L on L.row_num = C.fk_location
group by L.departementcode, L.departementlabel
order by L.departementcode;
```
Les taux qu'on obtiendra seront pour 10 000 habitants.
### Résultats
On obtient les résultats bruts suivants:
|departement_code|nb_calls_against_population|departement_nom
|--- | ---- | ---
|01|1.28452636868173590000|Ain
|02|0.941483198146002317500000|Aisne
|03|1.82016745540589730000|Allier
|04|3.24565462091444590000|Alpes-de-Haute-Provence
|05|3.79039135790770400000|Hautes-Alpes
|06|0.420451545403877932160000|Alpes-Maritimes
|07|1.86743265358784740000|Ardèche
|08|1.93872298322670360000|Ardennes
|09|4.48072587759216990000|Ariège
|10|1.52627178255272270000|Aube
|11|1.43924958778014520000|Aude
|12|2.05199097123972650000|Aveyron
|13|0.241796645796929504990000|Bouches-du-Rhône
|14|0.783831367725441960300000|Calvados
|15|2.73048812237666770000|Cantal
|16|0.989796610936404153750000|Charente
|17|1.07046601529385160000|Charente-Maritime
|18|1.50708483657356570000|Cher
|19|1.51477933349709330000|Corrèze
|20|1.57156808818742070000|Corse
|21|1.05515182675558080000|Côte-d'Or
|22|1.04398725273869710000|Côtes-d'Armor
|23|3.51676579868809350000|Creuse
|24|1.01887655192294310000|Dordogne
|25|1.16432733898808310000|Doubs
|26|1.13149277175794040000|Drôme
|27|0.753973169326217262750000|Eure
|28|1.45448649473523580000|Eure-et-Loir
|29|0.710786581251932098450000|Finistère
|30|0.830463273153472746700000|Gard
|31|0.430965024377630291970000|Haute-Garonne
|32|2.72264574489365010000|Gers
|33|0.425732487950250117840000|Gironde
|34|0.603373185218015187450000|Hérault
|35|0.548381825954799907780000|Ille-et-Vilaine
|36|2.75122658852071550000|Indre
|37|0.809814957282261003360000|Indre-et-Loire
|38|0.429940712967101474070000|Isère
|39|2.32893762265101820000|Jura
|40|1.40772783928440500000|Landes
|41|1.50587136170722790000|Loir-et-Cher
|42|0.591071330219462098220000|Loire
|43|2.84257629107981220000|Haute-Loire
|44|0.437681455436733145620000|Loire-Atlantique
|45|0.989217528934612721340000|Loiret
|46|4.06477335899719650000|Lot
|47|1.47824309235874130000|Lot-et-Garonne
|48|5.63907365399156700000|Lozère
|49|0.647180801183416322160000|Maine-et-Loire
|50|1.08217083469439700000|Manche
|51|0.720286879974481264820000|Marne
|52|2.59304379308489040000|Haute-Marne
|53|1.86079872247709530000|Mayenne
|54|0.671236554720847785470000|Meurthe-et-Moselle
|55|2.69553888314838940000|Meuse
|56|0.745639870855174367880000|Morbihan
|57|0.534574564560379242470000|Moselle
|58|2.09817758289942450000|Nièvre
|59|0.180952897960660839980000|Nord
|60|0.494709776606823696850000|Oise
|61|1.52047518154804210000|Orne
|62|0.292560990461831336550000|Pas-de-Calais
|63|0.821569584527428342220000|Puy-de-Dôme
|64|0.998770867464716808190000|Pyrénées-Atlantiques
|65|2.41567774858833830000|Hautes-Pyrénées
|66|1.52189153653662390000|Pyrénées-Orientales
|67|0.613625751571696895720000|Bas-Rhin
|68|0.763194593807022777890000|Haut-Rhin
|69|0.298907679249642089190000|Rhône
|70|1.58928653581540860000|Haute-Saône
|71|0.955457965157632870580000|Saône-et-Loire
|72|0.732785942234484173660000|Sarthe
|73|1.45277570959013570000|Savoie
|74|0.739337368883138487130000|Haute-Savoie
|75|0.242101778656586541840000|Paris
|76|0.443570680611683968560000|Seine-Maritime
|77|0.420152144505975222560000|Seine-et-Marne
|78|0.283395014137051282140000|Yvelines
|79|1.08675358837675240000|Deux-Sèvres
|80|0.825091198910177412160000|Somme
|81|1.15238459289906240000|Tarn
|82|2.34545778645075950000|Tarn-et-Garonne
|83|0.629371008784066062250000|Var
|84|1.09661791286195730000|Vaucluse
|85|0.964478610007715828880000|Vendée
|86|1.21336840776944060000|Vienne