extract_universities.py 2.04 KB
Newer Older
Florent Chehab's avatar
Florent Chehab committed
1 2 3 4 5 6 7
#!/usr/bin/env python3

"""
Script to insert the country data in the database

IT HAS TO BE RUN INSIDE ./manage.py shell
"""
Florent Chehab's avatar
Florent Chehab committed
8
import csv
Florent Chehab's avatar
Florent Chehab committed
9 10 11
import os
import time
from geopy.geocoders import Nominatim
Florent Chehab's avatar
Florent Chehab committed
12
import reverse_geocoder as rg
Florent Chehab's avatar
Florent Chehab committed
13

14
tmp = os.path.join(os.path.realpath(__file__), "../../assets/destinations.csv")
Florent Chehab's avatar
Florent Chehab committed
15 16
destinations_path = os.path.abspath(tmp)

17 18 19
tmp = os.path.join(
    os.path.realpath(__file__), "../../assets/destinations_extracted.csv"
)
Florent Chehab's avatar
Florent Chehab committed
20 21 22 23 24 25
destinations_extracted_path = os.path.abspath(tmp)

if not os.path.isfile(destinations_path):
    print(destinations_path)
    raise Exception("Missing file containing country data")

26 27
with open(destinations_path, "rt") as input:
    with open(destinations_extracted_path, "w") as output:
Florent Chehab's avatar
Florent Chehab committed
28 29 30 31 32 33 34 35 36 37
        print("ini")
        reader = csv.reader(input)
        spamwriter = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC)
        geolocator = Nominatim()

        failed = []
        i = 0
        for row in reader:
            # handle the header
            if i == 0:
38
                header = ["university", "city", "country", "lat", "lon"]
Florent Chehab's avatar
Florent Chehab committed
39 40 41
                spamwriter.writerow(header)
                i += 1
            else:
Florent Chehab's avatar
Florent Chehab committed
42 43
                query = row[2]  # + ', ' + row[1] + ', ' + row[0]

Florent Chehab's avatar
Florent Chehab committed
44 45 46 47
                while True:
                    try:
                        location = geolocator.geocode(query)
                        break
Florent Chehab's avatar
Florent Chehab committed
48
                    except:  # noqa: E722
Florent Chehab's avatar
Florent Chehab committed
49
                        print("error during query, retrying")
Florent Chehab's avatar
Florent Chehab committed
50
                        time.sleep(0.5)
Florent Chehab's avatar
Florent Chehab committed
51
                if location is not None:
Florent Chehab's avatar
Florent Chehab committed
52 53
                    coord = (location.latitude, location.longitude)
                    res = rg.search(coord, verbose=False)
54 55 56 57 58 59 60
                    line = [
                        row[2],
                        row[1],
                        res[0]["cc"],
                        location.latitude,
                        location.longitude,
                    ]
Florent Chehab's avatar
Florent Chehab committed
61 62
                    print(line)
                    spamwriter.writerow(line)
Florent Chehab's avatar
Florent Chehab committed
63 64 65
                else:
                    failed.append(query)

Florent Chehab's avatar
Florent Chehab committed
66
        print(failed)