script_cassandra.py 6.03 KB
Newer Older
Thibaud DE FILIPPIS's avatar
Thibaud DE FILIPPIS committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
from cassandra.cluster import Cluster
import cassandra.cluster
import csv
import re
from datetime import datetime
import load

from cassandra.cluster import Cluster
from load_data_projet import load_data

     
         
def limiteur2(data, limit):
    for i, d in enumerate(data):
        if i==limit:
            return None
        yield d


cluster = Cluster(['localhost'])
session = cluster.connect('tdefilip_projet')


#==============================================================================
# creation_table_question_1 = "CREATE TABLE question_1_v2( \
#                 station_id text,\
#                 latitude float,\
#                 longitude float,\
#                 year varint,\
#                 month varint,\
#                 day varint,\
#                 hour varint,\
#                 minute varint,\
#                 tmpf float,\
#                 dwpf float,\
#                 relh float,\
#                 drct float,\
#                 sknt float,\
#                 alti float,\
#                 vsby float,\
#                 gust float,\
#                 skyc1 text,\
#                 skyc2 text,\
#                 skyc3 text,\
#                 skyc4 text,\
#                 skyl1 float,\
#                 skyl2 float,\
#                 skyl3 float,\
#                 skyl4 float,\
#                 wxcodes text,\
#                 feel float,\
#                 metar text,\
#                 PRIMARY KEY ((station_id, year), month, hour, minute));"
# 
# session.execute(creation_table_question_1)
#==============================================================================
#==============================================================================
# creation_table_question_2 = "CREATE TABLE question_2( \
#                 station_id text,\
#                 latitude float,\
#                 longitude float,\
#                 year varint,\
#                 month varint,\
#                 day varint,\
#                 hour varint,\
#                 minute varint,\
#                 tmpf float,\
#                 dwpf float,\
#                 relh float,\
#                 drct float,\
#                 sknt float,\
#                 alti float,\
#                 vsby float,\
#                 gust float,\
#                 skyc1 text,\
#                 skyc2 text,\
#                 skyc3 text,\
#                 skyc4 text,\
#                 skyl1 float,\
#                 skyl2 float,\
#                 skyl3 float,\
#                 skyl4 float,\
#                 wxcodes text,\
#                 feel float,\
#                 metar text,\
#                 PRIMARY KEY ((year, month), day, hour, minute, station_id));"
# 
# session.execute(creation_table_question_2)
#==============================================================================
#==============================================================================
# def insert_1(csv, session):
#     gen = limiteur2(load.loadata(csv),  500)
#     # data = load_ata(csvfilename)
#     i = 0
#     for r in gen:
# 
#         data = (r["station"],r['lat'],r['lon'],r["timestamp"][0],r["timestamp"][1],
#                 r["timestamp"][2],r["timestamp"][3],r["timestamp"][4],r["tmpf"],
#                 r["dwpf"],r["relh"],r["drct"],r["sknt"],r["alti"],r["vsby"],r["gust"],
#                 r["skyc1"],r["skyc2"],r["skyc3"],r["skyc4"],r["skyl1"],r["skyl2"],
#                 r["skyl3"],r["skyl4"],r["wxcodes"],r["feel"],r["metar"])
#         req = """
#         INSERT INTO question_1_extract(station_id,latitude,longitude,year,month,day,
#         hour, minute,tmpf,dwpf,relh,drct,sknt,alti,vsby,gust,skyc1,skyc2,skyc3,
#         skyc4,skyl1,skyl2,skyl3,skyl4,wxcodes,feel,metar )
#         VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 
#         %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
#         """
#         session.execute(req, data)
# 
# insert_1('asos.csv', session)
#==============================================================================

#==============================================================================
# def insert_1(csv, session):
#     gen = load.loadata(csv)
#     # data = load_ata(csvfilename)
#     for r in gen:
#         data = (r["station"],r['lat'],r['lon'],r["timestamp"][0],r["timestamp"][1],
#                 r["timestamp"][2],r["timestamp"][3],r["timestamp"][4],r["tmpf"],
#                 r["dwpf"],r["relh"],r["drct"],r["sknt"],r["alti"],r["vsby"],r["gust"],
#                 r["skyc1"],r["skyc2"],r["skyc3"],r["skyc4"],r["skyl1"],r["skyl2"],
#                 r["skyl3"],r["skyl4"],r["wxcodes"],r["feel"],r["metar"])
#         req = """
#         INSERT INTO question_1(station_id,latitude,longitude,year,month,day,
#         hour, minute,tmpf,dwpf,relh,drct,sknt,alti,vsby,gust,skyc1,skyc2,skyc3,
#         skyc4,skyl1,skyl2,skyl3,skyl4,wxcodes,feel,metar )
#         VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 
#         %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
#         """
#         session.execute(req, data)
# 
# insert_1('asos.csv', session)
#==============================================================================

def insert_1(csv, session):
    gen = load.loadata(csv)
    # data = load_ata(csvfilename)
    for r in gen:
        data = (r["station"],r['lat'],r['lon'],r["timestamp"][0],r["timestamp"][1],
                r["timestamp"][2],r["timestamp"][3],r["timestamp"][4],r["tmpf"],
                r["dwpf"],r["relh"],r["drct"],r["sknt"],r["alti"],r["vsby"],r["gust"],
                r["skyc1"],r["skyc2"],r["skyc3"],r["skyc4"],r["skyl1"],r["skyl2"],
                r["skyl3"],r["skyl4"],r["wxcodes"],r["feel"],r["metar"])
        req = """
        INSERT INTO question_1_v2(station_id,latitude,longitude,year,month,day,
        hour, minute,tmpf,dwpf,relh,drct,sknt,alti,vsby,gust,skyc1,skyc2,skyc3,
        skyc4,skyl1,skyl2,skyl3,skyl4,wxcodes,feel,metar )
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        session.execute(req, data)

insert_1('asos.csv', session)