Commit 47918e48 authored by Romain Creuzenet's avatar Romain Creuzenet

Read CSV

parents
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Projet.iml" filepath="$PROJECT_DIR$/.idea/Projet.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="dec891dc-2fad-4291-af33-64d4fd64029d" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/create_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="794">
<caret line="88" column="17" selection-start-line="88" selection-start-column="17" selection-end-line="88" selection-end-column="17" />
<folding>
<element signature="e#0#25#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/espagne_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret column="12" selection-start-column="8" selection-end-column="13" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/parameters.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="57">
<caret line="3" column="79" lean-forward="true" selection-start-line="3" selection-start-column="79" selection-end-line="3" selection-end-column="79" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/parameters.py" />
<option value="$PROJECT_DIR$/create_table.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="67" />
<option name="y" value="25" />
<option name="width" value="927" />
<option name="height" value="1055" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="Projet" type="b2602c69:ProjectViewProjectNode" />
<item name="Projet" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="dec891dc-2fad-4291-af33-64d4fd64029d" name="Default Changelist" comment="" />
<created>1560030768033</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1560030768033</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="67" y="25" width="1853" height="1055" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="Favorites" side_tool="true" />
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24972677" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Version Control" />
<window_info anchor="bottom" id="Python Console" />
<window_info anchor="bottom" id="Terminal" />
<window_info anchor="bottom" id="Event Log" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/parameters.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="57">
<caret line="3" column="79" lean-forward="true" selection-start-line="3" selection-start-column="79" selection-end-line="3" selection-end-column="79" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/espagne_2001_2010.csv">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret column="12" selection-start-column="8" selection-end-column="13" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/create_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="794">
<caret line="88" column="17" selection-start-line="88" selection-start-column="17" selection-end-line="88" selection-end-column="17" />
<folding>
<element signature="e#0#25#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
from parameters import *
from cassandra.cluster import Cluster
from collections import OrderedDict
import csv
import re
cluster = Cluster(['localhost'])
session = cluster.connect(key_space)
columns = OrderedDict([
('station', str),
('year', int),
('month', int),
('day', int),
('hour', int),
('minute', int),
('lon', float),
('lat', float),
('tmpf', float),
('dwpf', float),
('relh', float),
('drct', float),
('sknt', float),
# ('alti', ), all null
('mslp', float),
('vsby', float),
('gust', float),
('skyc1', str),
('skyc2', str),
('skyc3', str),
('skyc4', str),
('skyl1', float),
('skyl2', float),
('skyl3', float),
('skyl4', float),
('wxcodes', str),
# ('ice_accretion_1hr', ), all null
# ('ice_accretion_3hr', ), all null
# ('ice_accretion_6hr', ), all null
# ('peak_wind_gust', ), all null
# ('peak_wind_drct', ), all null
# ('peak_wind_time', ), all null
('feel', float),
('metar', str)
])
def create_table():
""" Create the table"""
session.execute("DROP Table IF EXISTS {};".format(table))
translate_sql = {
str: 'text',
int: 'varint',
float: 'float'
}
query = """
CREATE TABLE {table}(
{columns},
PRIMARY KEY ({keys})
);
""".format(
table=table,
columns=",\n\t".join(["{} {}".format(key, translate_sql[value]) for key, value in columns.items()]),
keys=", ".join(list(columns.keys())[:5])
)
session.execute(query)
def read_csv():
"""Read CSV file"""
date_parser = re.compile(
r"(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+) (?P<hour>\d+):(?P<minute>\d+)"
)
with open(file_name) as file:
for row in csv.DictReader(file):
match = date_parser.match(row["valid"])
if not match:
continue
row.update(match.groupdict())
yield {key: f(row[key]) for key, f in columns.items() if row.get(key, 'null') != 'null'}
def insert_table():
"""Insert in table all content csv file"""
i = 0
for d in read_csv():
keys = []
values = []
for key, value in d.items():
keys.append(key)
values.append(value)
query = "INSERT INTO {table} ({keys}) VALUES ({values});".format(
table=table,
keys=", ".join(keys),
values=", ".join([v.__repr__() for v in values])
)
session.execute(query)
i += 1
print("{} lignes inserted".format(i))
if __name__ == "__main__":
create_table()
insert_table()
This source diff could not be displayed because it is too large. You can view the blob instead.
# === Parameters ===
key_space = "nf26"
table = "Spain"
file_name = "/home/romain/Documents/UTC/GI04/NF26/Projet/espagne_2001_2010.csv"
File added
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment