extract_universities.py 1.92 KB
Newer Older
Florent Chehab's avatar
Florent Chehab committed
1
2
3
4
5
6
7
#!/usr/bin/env python3

"""
Script to insert the country data in the database

IT HAS TO BE RUN INSIDE ./manage.py shell
"""
Florent Chehab's avatar
linting  
Florent Chehab committed
8
import csv
Florent Chehab's avatar
Florent Chehab committed
9
10
11
import os
import time
from geopy.geocoders import Nominatim
Florent Chehab's avatar
Florent Chehab committed
12
import reverse_geocoder as rg
Florent Chehab's avatar
Florent Chehab committed
13
14
15
16

tmp = os.path.join(os.path.realpath(__file__), '../../assets/destinations.csv')
destinations_path = os.path.abspath(tmp)

Florent Chehab's avatar
linting  
Florent Chehab committed
17
18
tmp = os.path.join(os.path.realpath(__file__),
                   '../../assets/destinations_extracted.csv')
Florent Chehab's avatar
Florent Chehab committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
destinations_extracted_path = os.path.abspath(tmp)

if not os.path.isfile(destinations_path):
    print(destinations_path)
    raise Exception("Missing file containing country data")

with open(destinations_path, 'rt') as input:
    with open(destinations_extracted_path, 'w') as output:
        print("ini")
        reader = csv.reader(input)
        spamwriter = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC)
        geolocator = Nominatim()

        failed = []
        i = 0
        for row in reader:
            # handle the header
            if i == 0:
Florent Chehab's avatar
Florent Chehab committed
37
                header = ['university', 'city', 'country', 'lat', 'lon']
Florent Chehab's avatar
Florent Chehab committed
38
39
40
                spamwriter.writerow(header)
                i += 1
            else:
Florent Chehab's avatar
linting  
Florent Chehab committed
41
42
                query = row[2]  # + ', ' + row[1] + ', ' + row[0]

Florent Chehab's avatar
Florent Chehab committed
43
44
45
46
                while True:
                    try:
                        location = geolocator.geocode(query)
                        break
Florent Chehab's avatar
linting  
Florent Chehab committed
47
                    except:  # noqa: E722
Florent Chehab's avatar
Florent Chehab committed
48
                        print("error during query, retrying")
Florent Chehab's avatar
Florent Chehab committed
49
                        time.sleep(0.5)
Florent Chehab's avatar
Florent Chehab committed
50
                if location is not None:
Florent Chehab's avatar
Florent Chehab committed
51
52
53
54
55
                    coord = (location.latitude, location.longitude)
                    res = rg.search(coord, verbose=False)
                    line = [row[2], row[1], res[0]['cc'], location.latitude, location.longitude]
                    print(line)
                    spamwriter.writerow(line)
Florent Chehab's avatar
Florent Chehab committed
56
57
58
                else:
                    failed.append(query)

Florent Chehab's avatar
linting  
Florent Chehab committed
59
        print(failed)