...
 
Commits (2)
......@@ -12,7 +12,7 @@ if __name__ == "__main__":
# Specifying info
final_doc = os.path.join(PROJECT_FOLDER, "main.pdf")
latexer = LaTeXer(final_doc=final_doc)
latexer = LaTeXer(lang="fr", final_doc=final_doc)
# Getting interface for data
engine = create_engine(get_db_string())
......
......@@ -5,12 +5,13 @@ from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from src.models import reset_db, Course
from src.scrapper import UTCScrapper
from src.settings import DATA_FOLDER, LANG
from src.utc_specific.scrapper import UTCScrapper
from src.settings import DATA_FOLDER
from src.utils import get_db_string
if __name__ == "__main__":
scrapper = UTCScrapper(results_file=os.path.join(DATA_FOLDER, 'results.html'))
scrapper = UTCScrapper(results_file=os.path.join(DATA_FOLDER, 'results.html'),
js_dump=os.path.join(DATA_FOLDER, "data.json"))
# Cleaning things
reset_db()
......@@ -19,24 +20,33 @@ if __name__ == "__main__":
engine = create_engine(db_string, echo=False)
session_maker = sessionmaker(bind=engine)
courses, fr_descrs, en_descrs = scrapper.get_courses_and_description()
for batch in [courses, fr_descrs, en_descrs]:
session = session_maker()
session.add_all(batch)
session.commit()
# Getting Semesters and Results (online, if the first time)
html_of_result, _ = scrapper.get_html_results_and_student_details()
diplomas, semesters, course_results = scrapper.scrap_results(html_of_result)
for batch in [diplomas, semesters]:
for batch in [diplomas, semesters, course_results]:
session = session_maker()
session.add_all(batch)
session.commit()
# Getting Info about courses online
courses_codes = list(map(lambda cr: cr.course_code, course_results))
courses, courses_descriptions, missing_courses_code = \
scrapper.scrap_courses_description(lang=LANG, courses_to_scrap=courses_codes)
perso_courses_codes = list(map(lambda cr: cr.course_code, course_results))
courses_codes = list(map(lambda c: c.code, courses))
# Missing courses are courses present in the student profile but not online
missing_courses = list(map(lambda cc: Course(code=cc), missing_courses_code))
missing_courses_code = set(perso_courses_codes).difference(courses_codes)
if len(missing_courses_code) != 0:
print("Missing courses online")
for m_c in missing_courses_code:
print(m_c)
for batch in [missing_courses, courses, courses_descriptions, course_results]:
missing_courses = list(map(lambda cc: Course(code=cc), missing_courses_code))
session = session_maker()
session.add_all(batch)
session.add_all(missing_courses)
session.commit()
......@@ -3,7 +3,7 @@ import os
from sqlalchemy import func
from src.models import Semester, CourseResult, Course, CourseDescription, Diploma
from src.settings import LANG, PROJECT_FOLDER
from src.settings import PROJECT_FOLDER
from .utils import get_public_attributes_names, StudentDetails
......@@ -18,10 +18,9 @@ class LaTeXer:
the header, the beginning of the transcript as well as its footer.
"""
def __init__(self, final_doc: str,
def __init__(self, lang: str, final_doc: str,
out_dir=os.path.join(PROJECT_FOLDER, "out"),
templates_dir=os.path.join(PROJECT_FOLDER, "templates"),
lang: str = LANG):
templates_dir=os.path.join(PROJECT_FOLDER, "templates")):
self._final_doc = final_doc
self._out_dir = out_dir
self._main_doc = os.path.join(out_dir, "main.tex")
......@@ -32,7 +31,7 @@ class LaTeXer:
"_semester_template", "_course_template",
"_semester_opts_template", "_diploma_template"]:
file = f"{field[1:]}.tex"
with open(os.path.join(templates_dir, file), "r") as tex_file:
with open(os.path.join(templates_dir, self._lang, file), "r") as tex_file:
setattr(self, field, tex_file.read())
@staticmethod
......
import textwrap
from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, ForeignKey, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.hybrid import hybrid_property
......@@ -51,15 +53,15 @@ class Semester(Base):
@hybrid_property
def type(self):
return "Fall" if self.is_fall else "Spring"
return "Automne" if self.is_fall else "Printemps"
@hybrid_property
def first_month(self):
return self.start.strftime("%B")
return self.start.strftime("%B").replace("February", "FĂ©vrier").replace("September", "Septembre")
@hybrid_property
def last_month(self):
return self.end.strftime("%B")
return self.end.strftime("%B").replace("January", "Janvier").replace("July", "Juillet")
@hybrid_property
def code(self):
......@@ -88,7 +90,13 @@ class Course(Base):
has_final_exam = Column(Boolean)
def __repr__(self):
return f"{self.code} {self.type} {self.ects} ECTS "
repr = f"{self.code} ({self.type})\n"
repr += f" - worths {self.ects} ECTS\n"
repr += " - taught in Spring\n" if self.taught_in_spring else ""
repr += " - taught in Fall\n" if self.taught_in_fall else ""
repr += " - has a final exam\n" if self.has_final_exam else ""
return repr
class CourseDescription(Base):
......@@ -115,7 +123,29 @@ class CourseDescription(Base):
course = relationship("Course", back_populates="descriptions")
def __repr__(self):
return f"Description of {self.course_code} in {self.lang}"
def format_paragraph(repr):
return "\n".join(textwrap.wrap(repr))
repr = f"Description of {self.course_code} in {self.lang}\n\n"
repr += f" - Title: {format_paragraph(self.title)}\n"
repr += f" - Overview:\n{format_paragraph(self.overview)}\n\n"
repr += f" - Curriculum:\n {format_paragraph(self.curriculum)}\n\n"
repr += f" - Outcomes:\n {format_paragraph(self.outcomes)}\n\n"
repr += f" - Training Objectives:\n {format_paragraph(self.training_objectives)}\n\n"
repr += f" - Pedagogical Objectives:\n {format_paragraph(self.pedagogical_objectives)}\n\n"
repr += f" - Other Objectives:\n {format_paragraph(self.other_objectives)}\n\n"
repr += f" - Bibliography:\n {format_paragraph(self.bibliography)}\n\n"
repr += f" - Recommended level:\n {format_paragraph(self.recommended_level)}\n\n"
repr += f" - Assessment Criteria:\n {format_paragraph(self.assessment_criteria)}\n\n"
repr += f" - Success Criteria:\n {format_paragraph(self.success_criteria)}\n\n"
repr += f" - Misc.:\n {format_paragraph(self.misc)}\n"
repr += "\n"
return repr
class CourseResult(Base):
......
from bs4 import BeautifulSoup
from src.models import Course, CourseDescription
class UTCXMLParser:
""" A parser to extract information from the json file.
For UTC students only.
Subclass are language specific.
"""
def parse(self, course_xml_lang):
"""
Parse the XML corresponding to a course with the description
being given in the specific langage.
:return:
"""
# Course Default values
type_ = None
ects = 0
taught_in_fall = False
taught_in_spring = False
has_final_exam = False
course_descr_content = {k: "" for k in self.course_description_headers.keys()}
soup = BeautifulSoup(course_xml_lang, "lxml")
code_title = soup.h4.text.strip()
code, title = code_title.split(":", 1)
code = code.strip()
title = title.strip().capitalize()
course_descr_content["title"] = title
course_descr_content["course_code"] = code
course_descr_content["lang"] = self.lang
# Doing some gymnastic 🤸
# Data is contained in a table, parsing it is not trivial
tr_entries = soup.find_all("td")
for entry in tr_entries:
header = entry.text
if self.ects_text in header:
associated_text = entry.next_sibling.next_sibling.text
ects = int(associated_text.replace("ECTS", "").strip())
elif self.taugh_in_text in header:
associated_text = entry.next_sibling.next_sibling.text
taught_in_fall = self.automn_text in associated_text
taught_in_spring = self.spring_text in associated_text
elif self.has_final_exam_text == header:
has_final_exam_ans = entry.next_sibling.next_sibling
has_final_exam = self.yes_text in has_final_exam_ans
# Category
elif self.cs_text in header:
type_ = "CS"
elif self.tm_text in header:
type_ = "TM"
elif self.tsh_text in header:
type_ = "TSH"
# Course Description parsing
for field, h in self.course_description_headers.items():
if h in header:
try:
associated_text = entry.next_sibling.next_sibling.text
course_descr_content[field] = associated_text
except AttributeError as e:
print(f"{type(e)} thrown while parsing")
print(f" - UV: {code}")
print(f" - Entry: {entry}")
print(f" - Field: {field}")
print(e)
course = Course(code=code,
type=type_,
ects=ects,
taught_in_fall=taught_in_fall,
taught_in_spring=taught_in_spring,
has_final_exam=has_final_exam)
course_descr = CourseDescription(**course_descr_content)
return course, course_descr
class FrenchParser(UTCXMLParser):
def __init__(self):
self.lang = "fr"
# Text used to find specific field
self.ects_text = "Crédits : "
self.taugh_in_text = "Enseignée en : "
self.has_final_exam_text = "Examen final : "
self.cs_text = "Scientifiques (CS-ST)"
self.tm_text = "Techniques (TM-ST)"
self.tsh_text = "Technologie et Sciences de l'Homme"
self.yes_text = "Oui"
self.automn_text = "Automne"
self.spring_text = "Printemps"
self.course_description_headers = {
"overview": "Description brève : ",
"bibliography": "Ouvrage(s) de référence : ",
"recommended_level": "Niveau conseillé : ",
"assessment_criteria": "Conditions d'Ă©valuation : ",
"success_criteria": "Conditions d'attribution : ",
"misc": "Particularités: ",
"training_objectives": "Objectif de formation : ",
"pedagogical_objectives": "Objectifs pédagogiques spécifiques : ",
"other_objectives": "Objectifs pédagogiques transverses : ",
"curriculum": "Programme : ",
"outcomes": "RĂ©sultats : "
}
class EnglishParsen(UTCXMLParser):
def __init__(self):
self.lang = "en"
# Text used to find specific field
self.ects_text = "Credits : "
self.taugh_in_text = "Semester(s) : "
self.has_final_exam_text = "Final exam : "
self.cs_text = "Scientifiques (CS-ST)"
self.tm_text = "Techniques (TM-ST)"
self.tsh_text = "Technologie et Sciences de l'Homme"
self.yes_text = "Oui"
self.automn_text = "Automne"
self.spring_text = "Printemps"
self.course_description_headers = {
"overview": "Short description : ",
"bibliography": "Reference work : ",
"recommended_level": "Required level : ",
"assessment_criteria": "Evaluation : ",
"success_criteria": "Conditions for providing : ",
"misc": "Particularities: ",
"training_objectives": "Course objective : ",
"pedagogical_objectives": "Specific pedagogical objectives : ",
"other_objectives": "Transversal specific objectives : ",
"curriculum": "Program : ",
"outcomes": "Results : "
}
import codecs
import getpass
import json
import os
import pickle
import re
......@@ -12,27 +13,23 @@ from selenium import webdriver
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.keys import Keys
from src.utc_specific.parsers import EnglishParsen, FrenchParser
from src.driver_wrapper import DriverWrapper
from src.models import CourseDescription, Course, CourseResult, Semester, Diploma
from src.settings import DATA_FOLDER, SUPPORTED_LANGS, STUDENT_DETAILS_FILE, CREDENTIALS_FILE
from .utils import build_object, StudentDetails, get_student_details
from src.utils import build_object, StudentDetails, get_student_details
class UTCScrapper:
def __init__(self, results_file: str = os.path.join(DATA_FOLDER, 'results.html'),
student_details_file: str = STUDENT_DETAILS_FILE,
credentials_file: str = CREDENTIALS_FILE,
serialized_courses_indices: str = os.path.join(DATA_FOLDER, "courses_indices.pickle"),
encoding="iso-8859-15"):
credentials_file: str = CREDENTIALS_FILE):
self._results_file = results_file
self._student_details_file = student_details_file
self._credentials_file = credentials_file
self._serialized_courses_indices = serialized_courses_indices
self._encoding = encoding
self._ent_url = "https://webapplis.utc.fr/ent/index.jsf"
self._courses_url = "https://webapplis.utc.fr/uvs/index.xhtml"
self._ent_url = "https://webapplis.utc_specific.fr/ent/index.jsf"
self._short_name_to_diploma = {
"TC": "Two First Preparatory Years",
......@@ -300,9 +297,9 @@ class UTCScrapper:
return diplomas, semesters, course_results
def scrap_courses_description(self, lang, courses_to_scrap=None):
def get_courses_and_description(self, lang=None, courses_to_scrap=None):
"""
Get the descriptions of the courses online and return associated objects.
Get the descriptions of the courses and return associated objects.
If courses_to_scrap is None, all the courses are scrapped.
......@@ -312,6 +309,62 @@ class UTCScrapper:
courses_descriptions: list of CoursesDescriptions,
missing_courses_code: list of code of missing courses
"""
raise NotImplemented()
class JSDumpUTCScrapper(UTCScrapper):
""" To get the descriptions of the courses using the JavaScript snippet. """
def __init__(self, js_dump = os.path.join(DATA_FOLDER, "data.json"),
results_file: str = os.path.join(DATA_FOLDER, 'results.html'),
student_details_file: str = STUDENT_DETAILS_FILE,
credentials_file: str = CREDENTIALS_FILE):
super(JSDumpUTCScrapper, self).__init__(results_file, student_details_file, credentials_file)
self._js_dump = js_dump
def get_courses_and_description(self, lang=None, courses_to_scrap=None):
with open(self._js_dump) as f:
data = json.load(f)
# Cleaning the json
data = {k:
{
kk: vv.replace("\t", "") for kk, vv in v.items()
}
for k, v in data.items()
}
fr_parser = FrenchParser()
en_parser = EnglishParsen()
courses = []
descrs = []
for uv_code in data.keys():
fr_xml = data[uv_code]["fr"]
en_xml = data[uv_code]["en"]
course, fr_descr = fr_parser.parse(fr_xml)
_, en_descr = en_parser.parse(en_xml)
courses.append(course)
descrs.append(fr_descr)
descrs.append(en_descr)
return courses, descrs, []
class OnlineUTCScrapper(UTCScrapper):
"""The Old scrapper that get info of course online directly """
def __init__(self, results_file: str = os.path.join(DATA_FOLDER, 'results.html'),
student_details_file: str = STUDENT_DETAILS_FILE,
credentials_file: str = CREDENTIALS_FILE):
super(OnlineUTCScrapper, self).__init__(results_file, student_details_file, credentials_file)
self._serialized_courses_indices = os.path.join(DATA_FOLDER, "courses_indices.pickle")
self._encoding = "iso-8859-15"
self._courses_url = "https://webapplis.utc_specific.fr/uvs/index.xhtml"
def get_courses_and_description(self, lang=None, courses_to_scrap=None):
driver = DriverWrapper(webdriver.Chrome())
driver.get(self._courses_url)
driver.maximize_window()
......@@ -380,6 +433,41 @@ class UTCScrapper:
return courses, courses_descriptions, missing_courses_code
def _get_courses_indices(self, driver):
"""
Returns the indices for locating course on the webpage.
Cache this as a pickle dump.
:param driver: the webdriver to use
:return: a dict like {"UV00" : 42, ... }
"""
if not (os.path.exists(self._serialized_courses_indices)):
print("> Extracting courses indices")
course_to_parse = True
index = 0
courses_indices = dict()
def xpath_of_course_code(number):
return f'//*[@id="mainForm"]/div/table/tbody/tr[{number+2}]/th[1]'
while course_to_parse:
elems = driver.find_elements_by_xpath(xpath_of_course_code(index))
if len(elems) == 0:
course_to_parse = False
print(f"Done extracting {len(courses_indices)} courses indices !")
else:
course_code = elems[0].text
courses_indices[course_code] = index
index += 1
pickle.dump(courses_indices, open(self._serialized_courses_indices, "wb"))
courses_indices = pickle.load(open(self._serialized_courses_indices, "rb"))
return courses_indices
def _parse_course(self, driver, lang):
"""
Return the Course and CourseDescription of a specific course
......@@ -478,38 +566,3 @@ class UTCScrapper:
outcomes=outcomes)
return course, course_description
def _get_courses_indices(self, driver):
"""
Returns the indices for locating course on the webpage.
Cache this as a pickle dump.
:param driver: the webdriver to use
:return: a dict like {"UV00" : 42, ... }
"""
if not (os.path.exists(self._serialized_courses_indices)):
print("> Extracting courses indices")
course_to_parse = True
index = 0
courses_indices = dict()
def xpath_of_course_code(number):
return f'//*[@id="mainForm"]/div/table/tbody/tr[{number+2}]/th[1]'
while course_to_parse:
elems = driver.find_elements_by_xpath(xpath_of_course_code(index))
if len(elems) == 0:
course_to_parse = False
print(f"Done extracting {len(courses_indices)} courses indices !")
else:
course_code = elems[0].text
courses_indices[course_code] = index
index += 1
pickle.dump(courses_indices, open(self._serialized_courses_indices, "wb"))
courses_indices = pickle.load(open(self._serialized_courses_indices, "rb"))
return courses_indices
\begin{document}
% \maketitle
\begin{center}
\begin{Huge}\textsc{Relevé de résultats}\end{Huge}
\vspace{3em}
\begin{Large}
RĂ©sultats pour le diplĂ´me \diploma
\end{Large}
\end{center}
\begin{center}
%
\begin{tabular}{llcll}
\hline
\hline
\textbf{Prénom} & \name & & \textbf{Université} & \university \\
\textbf{Nom} & \surname & & \textbf{DiplĂ´me} & \diplomashort \\
\textbf{Lieu de naissance} & \placebirth & & \textbf{DĂ©but d'Ă©tudes} & \enrolledsince \\
\textbf{INE} & \studentnumber & & \textbf{Fin d'Ă©tudes} & \studyend \\
\textbf{Ville} & \city & & \textbf{Majeure} & \major \\
\textbf{Pays} & \country & & \textbf{Mineur} & \minor \\
\hline
\hline
\end{tabular}
\end{center}
\begin{footnotesize}
L'UTC a adopté le système de notation ECTS entièrement compatible avec la politique pédagogique
basée sur l'attribution de crédit via des unités de valeur (UV).\\
Pour chacune de ses unités, les performances de l'étudiant sont attestées par son succès ou son
Ă©chec selon l'Ă©chelle ci-dessous :
\begin{table}[!ht]
\centering
\begin{tabular}{|c|c|p{7cm}|}
\hline
\textbf{} & \textbf{Note} & \textbf{Signification}\\ \hline\hline
&A & \textit{Excellent} : des résultats bien au dessus de la moyenne avec des erreurs minimes\\
\mr{5}{*}{\textsc{Réussite}}&B& \textit{Très bon} : au dessus de la moyenne avec quelques erreurs\\
&C& \textit{Bon} : genéralement bon mais un nombre notable d'erreurs\\
&D& \textit{Satisfaisant} : passable \\
&E& \textit{Suffisant} : les résultats atteignent le minimum\\
\hline
\mr{2}{*}{\textsc{Échec}} &FX & \textit{Échec Limite} : plus de travail aurait permit de valider le cours\\
& F & \textit{Echec} : un travail considérable aurait été nécessaire pour valider le cours\\
\hline
\end{tabular}
\end{table}
ECTS: \textit{European Credits Transfert System} ; 1 crédit ECTS représente environ 25 heures de travail.
Catégorie de cours: CS : Scientifique, TM : Technique, TSH : Humanités, SP : Stage.
\end{footnotesize}
\[ \star \quad \star \quad \star \]
\newpage
\begin{center}
Le président de l'\university, sousigné, certifie que \name\ \surname\ a obtenu un total de \totalcredits\ crédits
ECTS répartis comme suit.
% use packages: array
\begin{longtable}{|c|c|p{10cm}|c|c|}
\textbf{Code} & \textbf{Catégorie} & \textbf{Description du cours} & \textbf{ECTS} & \textbf{Note} \\ \hline\hline
\endhead
%
%%%% COURSE_CODE
\mr{3}{*}{COURSE_CODE} &
\mr{3}{*}{COURSE_TYPE} &
\textbf{Titre} : \textsc{COURSE_TITLE} &
\mr{3}{*}{COURSE_ECTS} &
\mr{3}{*}{GRADE_OBTAINED} \\ \cline{3-3}
&&\textbf{Aperçu :} COURSE_OVERVIEW&&\\
\cline{3-3}
&&\textbf{Programme :} COURSE_PROGRAM&&\\
\cline{1-5}
\ No newline at end of file
%%%% PĂ©riode de diplĂ´me
\multicolumn{5}{|l|}{} \\
\multicolumn{5}{|l|}{\large\textsc{DIPLOMA_NAME (DIPLOMA_PERIOD)}} \\\multicolumn{5}{|l|}{} \\
\hline
\ No newline at end of file
\end{longtable}
\end{center}
\[ \star \quad \star \quad \star \]
\vfill
\newcommand{\rulelen}{3.8cm}
\hspace{-1em}\rule{\rulelen}{1pt}\hfill \rule{\rulelen}{1pt}\hfill
\rule{\rulelen}{1pt}\\
Date \hfill Signature \hfill Seau du DĂ©partment
\end{document}
\documentclass[a4paper,10pt,final]{scrartcl}
\usepackage[francais]{babel}
\usepackage[utf8]{inputenc}
\usepackage[left]{eurosym}
\RequirePackage{graphicx}
\usepackage{multirow}
\usepackage{longtable}
\usepackage{lmodern} % Pour avoir de bonnes polices en pdf
\usepackage{graphicx} % Indispensable pour les figures
\usepackage{epstopdf} % Utile pour les figures, résout une erreur
\usepackage{amsmath} % Environnement pour les maths, permet du mettre du texte dans les Ă©quations
\usepackage{mathtools, bm} % Typographie pour les ensembles communs
\usepackage{amssymb, bm} % Typographie pour les ensembles communs
\usepackage{amsmath}
\setlength{\textwidth}{16cm}
\setlength{\textheight}{23cm}
\setlength{\oddsidemargin}{0cm}
\setlength\parindent{0pt}%Festlegen des Absatzeinzuges
%opening
\title{\textbf{TRANSCRIPT OF RECORDS}\\[0.5em]
Record of study and examination results in the \diploma
}
% \subtitle{foo}
\author{}
\newcounter{firstyear}
\setcounter{firstyear}{2014}
%%%%%%%%%%%%%% End of Configuration -- Naja, fast...
\newcommand{\mc}[3]{\multicolumn{#1}{#2}{#3}}
\newcommand{\mr}[3]{\multirow{#1}{#2}{#3}}
%%%%%%%% Makros von Roland
\def\instead#1#2{\hbox to 0pt{#2\hss}\phantom{#1}}
\newbox\insteadboxi
\newbox\insteadboxii
\newbox\insteadboxiii
\newbox\insteadboxiv
\newbox\insteadboxv
%%%%%%%%%
%%%%%%% Vars
\multicolumn{5}{|l|}{\textbf{Observation : } SEMESTER_OBSERVATION }\\
\hline
\ No newline at end of file
%%%%% Semestre SEMESTER_DATE
\multicolumn{5}{|l|}{} \\
\multicolumn{5}{|l|}{\textsc{SEMESTER_LEVEL -- SEMESTER_CODE: SEMESTER_TYPE SEMESTER_YEAR (SEMESTER_PERIOD)}} \\
\multicolumn{5}{|l|}{} \\
\hline