create_table.py 2.21 KB
Newer Older
Romain Creuzenet's avatar
Romain Creuzenet committed
1
from parameters import TABLE, DIR_DATA, SESSION, COLUMNS, TABLES
Romain Creuzenet's avatar
Romain Creuzenet committed
2 3
import csv
import re
Romain Creuzenet's avatar
Romain Creuzenet committed
4
import os
Romain Creuzenet's avatar
Romain Creuzenet committed
5 6 7 8


def create_table():
    """ Create the table"""
Romain Creuzenet's avatar
Romain Creuzenet committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
    for table, primary_key in TABLES.items():
        SESSION.execute("DROP Table IF EXISTS {};".format(table))
        translate_sql = {
            str: 'text',
            int: 'varint',
            float: 'float'
        }
        query = """
        CREATE TABLE {table}(
            time tuple<int,int,int, int, int>,
            {columns},
            PRIMARY KEY ({primary_key})
        );
        """.format(
            table=table,
            columns=",\n\t".join(["{} {}".format(key, translate_sql[value]) for key, value in COLUMNS.items()]),
            primary_key=primary_key
        )
        SESSION.execute(query)
Romain Creuzenet's avatar
Romain Creuzenet committed
28 29 30 31 32 33 34


def read_csv():
    """Read CSV file"""
    date_parser = re.compile(
        r"(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)"
    )
Romain Creuzenet's avatar
Romain Creuzenet committed
35 36 37 38 39 40 41 42 43 44 45
    for file_name in os.listdir(DIR_DATA):
        file_path = os.path.join(DIR_DATA, file_name)
        with open(file_path) as file:
            for row in csv.DictReader(file):
                match = date_parser.match(row["valid"])
                if not match:
                    continue
                m = match.groupdict()
                r = {key: f(row[key]) for key, f in COLUMNS.items() if row.get(key, 'null') != 'null'}
                r["time"] = (int(m['year']), int(m['month']), int(m['day']), int(m['hour']), int(m['minute']))
                yield r
Romain Creuzenet's avatar
Romain Creuzenet committed
46 47 48 49 50 51 52 53 54 55 56


def insert_table():
    """Insert in table all content csv file"""
    i = 0
    for d in read_csv():
        keys = []
        values = []
        for key, value in d.items():
            keys.append(key)
            values.append(value)
Romain Creuzenet's avatar
Romain Creuzenet committed
57 58 59 60 61 62 63 64 65 66 67

        keys = ", ".join(keys)
        values = ", ".join([v.__repr__() for v in values])

        for table in TABLES.keys():
            query = "INSERT INTO {table} ({keys}) VALUES ({values});".format(
                table=table,
                keys=keys,
                values=values
            )
            SESSION.execute(query)
Romain Creuzenet's avatar
Romain Creuzenet committed
68
        i += 1
Romain Creuzenet's avatar
Romain Creuzenet committed
69
        print("Ligne {}".format(i), end="\r")
Romain Creuzenet's avatar
Romain Creuzenet committed
70 71 72 73 74 75
    print("{} lignes inserted".format(i))


if __name__ == "__main__":
    create_table()
    insert_table()