Skip to content

Commit

Permalink
Fixes: language and status
Browse files Browse the repository at this point in the history
  • Loading branch information
ykerus committed Jan 19, 2024
1 parent 5c503c2 commit 2092afd
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 20 deletions.
1 change: 0 additions & 1 deletion src/crea_scraper/course.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,4 @@ class Course(CourseClass):
cursusnummer: str # 12345
docent: str # e.g. Yke Rusticus
taal: str # e.g. 🇳🇱
cursus_type: str # e.g. Fysiek
status: str # e.g. open
27 changes: 8 additions & 19 deletions src/crea_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,13 @@ def _get_raw_table_data(table_html, incl_empty_values=True):
rows = table_html.find_all("tr")
for row in rows:
cols = row.find_all("td")
cols = [e.text.strip() for e in cols]
table_data.append([e for e in cols if e or incl_empty_values])
cols_extracted = []
for col in cols:
if col.find("img"):
cols_extracted.append(", ".join([img["title"] for img in col.find_all("img")]))
else:
cols_extracted.append(col.text.strip())
table_data.append([e for e in cols_extracted if e or incl_empty_values])
return table_data


Expand Down Expand Up @@ -171,17 +176,6 @@ def _separate_time_from_day(table_dict):
return table_dict


def _rename_table_columns(table_dict):
try:
table_dict["cursus_type"] = table_dict["cursus type"]
del table_dict["cursus type"]
except KeyError:
print(f"Key 'cursus type' not found in {table_dict.keys()}")
table_dict["cursus_type"] = ""

return table_dict


def _extract_data_from_table(table_html):
table_data = _get_raw_table_data(table_html)
table_dict = _get_dict_from_raw_table_data(table_data)
Expand All @@ -190,11 +184,7 @@ def _extract_data_from_table(table_html):


def _get_course_status(register_link_html):
if "vol" in register_link_html.text:
return "vol"
if "gestart" in register_link_html.text:
return "gestart"
return "open"
return register_link_html.text


def _get_course_table_data(course_html) -> List[Dict]:
Expand All @@ -206,7 +196,6 @@ def _get_course_table_data(course_html) -> List[Dict]:
table_data = []
for table_html, register_link_html in zip(tables_html, register_links_html):
table_dict = _extract_data_from_table(table_html)
table_dict = _rename_table_columns(table_dict)
course_status = _get_course_status(register_link_html)
table_dict["status"] = course_status
table_data.append(table_dict)
Expand Down

0 comments on commit 2092afd

Please sign in to comment.