insertLocation.py 3.65 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
import csv
import re

def getData(container, value, type):
    result = container[value]
    if (result == "M" or result == "T") :
        result = None

    if (type == "string"):
        return result
    elif (type == "float") :
        if (result != None):
            result = float(result)
        return result
      
    elif (type == "date") :
        if (result != None) :
            dateparser = re.compile("(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)")
            match = dateparser.match(container[value])
            if not match :
                return "continue"
            data = match.groupdict()
            result = (
                int(data["year"]),
                int(data["month"]),
                int(data["day"]),
                int(data["hour"]),
                int(data["minute"]),
            )
            return result
    else :
        return None

def loadataLocation(filename): 
    with open(filename) as f:
        for r in csv.DictReader(f):
            timestamp = getData(r, "valid", "date")
            if (timestamp == "continue" or timestamp == None):
                continue
            data = {}
            data["valid"] = timestamp
            data["station"] = getData(r, "station", "string")

            data["lon"] = getData(r, "lon", "float")
            data["lat"] = getData(r, "lat", "float")
            data["tmpc"] = getData(r, "tmpc", "float")
            data["dwpc"] = getData(r, "dwpc", "float")
            data["relh"] = getData(r, "relh", "float")
            data["sknt"] = getData(r, "sknt", "float")
            data["p01m"] = getData(r, "p01m", "float")
            data["vsby"] = getData(r, "vsby", "float")
            data["feel"] = getData(r, "feel", "float")

            yield data


import json

from cassandra.cluster import Cluster
#Connexion au cluster
cluster = Cluster(['localhost'])   
session = cluster.connect('dbermond_projet')

#Création de la table 
query = '''
    DROP TABLE IF EXISTS weatherByLocation ;
    '''

session.execute(query)

query = '''
    CREATE TABLE weatherByLocation(
    station text,

    valid_year varint,
    valid_month varint,
    valid_day varint,
    valid_hour varint,
    valid_minute varint,

    lon float,
    lat float,
    tmpc float,
    dwpc float,
    relh float,
    sknt float,
    p01m float,
    vsby float,

    feel float,

    PRIMARY KEY ((station), valid_year, valid_month, valid_day,  valid_hour, valid_minute)
    );
'''

session.execute(query)

#Fonction de chargement des données dans la table
def getWeatherByLocation(csvfilename, session):
    data = loadataLocation(csvfilename)
    for r in data:
        t = (
            r["station"],

            r["valid"][0],
            r["valid"][1],
            r["valid"][2],
            r["valid"][3],
            r["valid"][4],

            r["lon"],
            r["lat"],
            r["tmpc"],
            r["dwpc"],
            r["relh"],
            r["sknt"],
            r["p01m"],
            r["vsby"],

            r["feel"],
            )
        query = """
        INSERT INTO weatherByLocation(

                station,

                valid_year,
                valid_month,
                valid_day,
                valid_hour,
                valid_minute,

                lon,
                lat,
                tmpc,
                dwpc,
                relh,
                sknt,
                p01m,
                vsby,
                
                feel)
        VALUES (%s, %s, %s ,%s ,%s, %s, %s, %s ,%s ,%s, %s, %s, %s, %s, %s)
        """
        session.execute(query, t)


#Chargement des données

getWeatherByLocation("/home/dbermond/Data/asos-2004>2013.txt", session)