Commit d9dbe1a5 authored by sim-baz's avatar sim-baz
Browse files

Add documentation

parent bba8b7f3
...@@ -19,4 +19,4 @@ Objective 3 ...@@ -19,4 +19,4 @@ Objective 3
Data for Finland from 2001 to 2010 Data for Finland from 2001 to 2010
### Projet.pdf ### Projet.pdf
Project intruction file Project instruction file
\ No newline at end of file \ No newline at end of file
...@@ -13,10 +13,28 @@ numeric_columns = loading.numeric_columns ...@@ -13,10 +13,28 @@ numeric_columns = loading.numeric_columns
MIN_DATE = loading.MIN_DATE MIN_DATE = loading.MIN_DATE
MAX_DATE = loading.MAX_DATE MAX_DATE = loading.MAX_DATE
'''
name: getHistory
description: Return the datas from the database
parameters:
* station: station id to restrict on
* indicator: indicator to select
return: query to create table
'''
def getHistory(station, indicator): def getHistory(station, indicator):
datas = session.execute(f"SELECT year, month, day, {indicator} FROM {table_name_space} where station = '{station}'") datas = session.execute(f"SELECT year, month, day, {indicator} FROM {table_name_space} where station = '{station}'")
return datas return datas
'''
name: getMeanByDay
description: Compute the mean of values by day
parameters:
* table: list with values
* dateMin: beginning year of the period to compute
* dateMax: ending year of the period to compute
return: dictionary with mean with date
'''
def getMeanByDay(table, dateMin, dateMax): def getMeanByDay(table, dateMin, dateMax):
# Dictionary to store sum of measures and number of measures by day # Dictionary to store sum of measures and number of measures by day
table_date = {} table_date = {}
...@@ -36,6 +54,13 @@ def getMeanByDay(table, dateMin, dateMax): ...@@ -36,6 +54,13 @@ def getMeanByDay(table, dateMin, dateMax):
return table_date return table_date
'''
name: getMeanByMonth
description: Compute the mean of values by month for the entire period
parameters:
* table: list with values
return: dictionary with mean by month
'''
def getMeanByMonth(table): def getMeanByMonth(table):
# Dictionary to store sum of measures and number of measures by month # Dictionary to store sum of measures and number of measures by month
table_month = {} table_month = {}
...@@ -53,7 +78,14 @@ def getMeanByMonth(table): ...@@ -53,7 +78,14 @@ def getMeanByMonth(table):
return table_month return table_month
# Verify the validity of the years given '''
name: verifyYearValidity
description: Verify the validity of the year given
parameters:
* dateMin: first year
* dateMax: last year
return: boolean indicating the validity
'''
def verifyYearValidity(dateMin, dateMax): def verifyYearValidity(dateMin, dateMax):
# Verification to ensure the validity of parameters, dates not equal # Verification to ensure the validity of parameters, dates not equal
if dateMin == dateMax: if dateMin == dateMax:
...@@ -66,6 +98,15 @@ def verifyYearValidity(dateMin, dateMax): ...@@ -66,6 +98,15 @@ def verifyYearValidity(dateMin, dateMax):
return False return False
return True return True
'''
name: plotHistory
description: Plot the curves of evolution and seasonality and save it
parameters:
* station: station to analyse
* indicator: indicator to analyse evolution
* dateMin: first year of the period to plot
* dateMax: last year of the period to plot
'''
def plotHistory(station, indicator, dateMin, dateMax): def plotHistory(station, indicator, dateMin, dateMax):
dateMax = dateMax + 1 dateMax = dateMax + 1
# Accept only indicator with numeric values (not factors) # Accept only indicator with numeric values (not factors)
......
...@@ -10,6 +10,15 @@ import history as h ...@@ -10,6 +10,15 @@ import history as h
colours = ['blue', 'red', 'green', 'orange', 'pink', 'white', 'purple', 'gray'] colours = ['blue', 'red', 'green', 'orange', 'pink', 'white', 'purple', 'gray']
'''
name: getDatasForPeriod
description: Query the database and get the values for a period for indicators
parameters:
* startPeriod: beginning date of period to select
* endPeriod: ending date of period to select
* indicators: list of indicators to select
return: result of the query
'''
def getDatasForPeriod(startPeriod, endPeriod, indicators): def getDatasForPeriod(startPeriod, endPeriod, indicators):
datas = [] datas = []
for i in range(int(startPeriod[0:4]), int(endPeriod[0:4]) + 1): for i in range(int(startPeriod[0:4]), int(endPeriod[0:4]) + 1):
...@@ -17,6 +26,17 @@ def getDatasForPeriod(startPeriod, endPeriod, indicators): ...@@ -17,6 +26,17 @@ def getDatasForPeriod(startPeriod, endPeriod, indicators):
return datas return datas
'''
name: verifyDateInPeriod
description: Verify that the date given is within the period of study
parameters:
* startPeriod: beginning date of period to select
* endPeriod: ending date of period to select
* year: year given by user
* month: month given by user
* day: day given by user
return: boolean indicating the validity
'''
def verifyDateInPeriod(startPeriod, endPeriod, year, month, day): def verifyDateInPeriod(startPeriod, endPeriod, year, month, day):
isDate = year.isdigit() and month.isdigit() and day.isdigit() isDate = year.isdigit() and month.isdigit() and day.isdigit()
if isDate: if isDate:
...@@ -27,6 +47,17 @@ def verifyDateInPeriod(startPeriod, endPeriod, year, month, day): ...@@ -27,6 +47,17 @@ def verifyDateInPeriod(startPeriod, endPeriod, year, month, day):
return True return True
return False return False
'''
name: getDecileForAllStations
description: Compute the decile of a list for different stations and indicators
parameters:
* startPeriod: beginning date of period to select
* endPeriod: ending date of period to select
* table: list of lists with all values
* nb_indicators: number of indicators to compute
* indicators_list: list of names of indicators to compute
return: a dictionary with lists of dictionaries of lists containing the deciles for indicators for stations
'''
def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list): def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indicators_list):
# map with station and list of maps # map with station and list of maps
# the list of maps is used for all indicators # the list of maps is used for all indicators
...@@ -66,6 +97,17 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica ...@@ -66,6 +97,17 @@ def getDecileForAllStations(startPeriod, endPeriod, table, nb_indicators, indica
return deciles return deciles
'''
name: applyKmeans
description: Apply k-means algorithm to clusterize space
parameters:
* deciles: a dictionary with lists of dictionaries of lists containing the deciles for indicators for stations
* nb_indicators: number of indicators in the deciles
* indicators_list: list of names of indicators in the deciles
* startPeriod: beginning date of period to select
* endPeriod: ending date of period to select
return: a dictionary with the station and its associated cluster
'''
def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod): def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod):
# Create table without map # Create table without map
table = [] table = []
...@@ -95,6 +137,14 @@ def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod) ...@@ -95,6 +137,14 @@ def applyKmeans(deciles, nb_indicators, indicators_list, startPeriod, endPeriod)
return res return res
'''
name: kmeans
description: Clusterize space for a period depending on deciles and create a map of the country
parameters:
* startPeriod: beginning date of period to select
* endPeriod: ending date of period to select
* indicators_list: list of names of indicators to take in account
'''
def kmeans(startPeriod, endPeriod, indicators_list): def kmeans(startPeriod, endPeriod, indicators_list):
startDate = datetime.strptime(startPeriod, "%Y-%m-%d") startDate = datetime.strptime(startPeriod, "%Y-%m-%d")
endDate = datetime.strptime(endPeriod, "%Y-%m-%d") endDate = datetime.strptime(endPeriod, "%Y-%m-%d")
......
...@@ -50,6 +50,13 @@ LAST_DAY = "2010-12-31" ...@@ -50,6 +50,13 @@ LAST_DAY = "2010-12-31"
# peak_wind_time: Peak Wind Gust Time (from PK WND METAR remark) # peak_wind_time: Peak Wind Gust Time (from PK WND METAR remark)
# metar: unprocessed reported observation in METAR format # metar: unprocessed reported observation in METAR format
'''
name: loadata
description: Load datas of the file, by creating a generator
parameters:
* filename: File name to load
return: generator of dictionaries with value and name of data
'''
def loadata(filename): def loadata(filename):
dateparser = re.compile( dateparser = re.compile(
"(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)" "(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)"
...@@ -223,12 +230,25 @@ NAME_COLUMNS = """ ...@@ -223,12 +230,25 @@ NAME_COLUMNS = """
metar metar
""" """
'''
name: dropTableQuery
description: Create a cql query to drop the table
parameters:
* table: Table name to drop
return: query to drop table
'''
def dropTableQuery(table): def dropTableQuery(table):
return f""" return f"""
DROP TABLE IF EXISTS {table} DROP TABLE IF EXISTS {table}
""" """
'''
name: createTableQuery
description: Create a cql query to create a table with attributes, partitionned by station
parameters:
* table: Table name to create
return: query to create table
'''
def createTableQuery(table): def createTableQuery(table):
query = f"""CREATE TABLE {table}( query = f"""CREATE TABLE {table}(
station varchar, station varchar,
...@@ -270,6 +290,14 @@ def createTableQuery(table): ...@@ -270,6 +290,14 @@ def createTableQuery(table):
)""" )"""
return query return query
'''
name: createTableQueryPartitionningByDate
description: Create a cql query to create a table with attributes, partitionned by date
parameters:
* table: Table name to create
return: query to create table
'''
def createTableQueryPartitionningByDate(table): def createTableQueryPartitionningByDate(table):
query = f"""CREATE TABLE {table}( query = f"""CREATE TABLE {table}(
station varchar, station varchar,
...@@ -311,6 +339,13 @@ def createTableQueryPartitionningByDate(table): ...@@ -311,6 +339,13 @@ def createTableQueryPartitionningByDate(table):
)""" )"""
return query return query
'''
name: insertQueryData
description: Create a cql query to insert a row
parameters:
* row: values for the attributes
* table: Table name to create
'''
def insertQueryData(row, table): def insertQueryData(row, table):
query = f""" query = f"""
INSERT INTO INSERT INTO
...@@ -358,7 +393,7 @@ def insertQueryData(row, table): ...@@ -358,7 +393,7 @@ def insertQueryData(row, table):
""" """
return query return query
# Not to execute when importing file # Not to execute when importing file
if __name__ == '__main__': if __name__ == '__main__':
cluster = Cluster() cluster = Cluster()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment