In [None]:
import os
import sys
import json
import time
import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
from dotenv import find_dotenv, load_dotenv

from datetime import datetime

sys.path.append('/Users/vasileiosvyzas/workspace/online_courses_aid/')
from src.udemy_client import UdemyClient
from src.utils import write_data_to_json_on_disk

In [None]:
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

CLIENT_ID = os.environ.get("UDEMY_CLIENT_ID")
CLIENT_SECRET = os.environ.get("UDEMY_CLIENT_SECRET")

# Call and test the class

In [None]:
udemy = UdemyClient()

# get the courses from the API
courses_list = udemy.get_courses_list()

print(len(courses_list))

# write first set of results on disk
file_name = f'courses_list_{str(datetime.now().date())}.json'
file_location = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), 'data/raw', file_name)

print(f"Writing the results of the courses list in {file_location}")
write_data_to_json_on_disk(courses=courses_list, file_location=file_location)

# TODO: add courses_list in elasticsearch

# get the details (title, description, headline etc.) for each course in the list
course_details_list = []
course_curriculum_list = []

for course in courses_list:
    course_id = course['id']
    
    print(f"Getting the course details for course: {course_id}")
    course_details = udemy.get_course_details(course_id=course_id)
    course_details_list.append(course_details)
    
    # print(f'Getting the curriculum for course: {course_id}')
    # course_curriculum = udemy.get_course_curriculum(course_id=course_id)
    # public_curriculum_items = {'course_id': course_id, 'curriculum': course_curriculum}
    # course_curriculum_list.append(public_curriculum_items)

# TODO: add course_details_list to elasticsearch or add the data to elasticsearch in the for loop one by one

# write first set of results on disk
file_name = f'courses_details_{str(datetime.now().date())}.json'
file_location = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), 'data/raw', file_name)
print(f"Writing the results of the courses list in {file_location}")
write_data_to_json_on_disk(courses=course_details_list, file_location=file_location)

# file_name = f'courses_curriculum_{str(datetime.now().date())}.json'
# file_location = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), 'data/raw', file_name)
# print(f"Writing the results of the courses list in {file_location}")
# write_data_to_json_on_disk(courses=course_curriculum_list, file_location=file_location)

In [None]:
udemy = UdemyClient()

# get the courses from the API
# courses_list = udemy.get_courses_list()

with open('../data/raw/courses_list_2023-08-21.json', 'r') as f:
    courses_list = json.load(f)

print(len(courses_list))

# get the details (title, description, headline etc.) for each course in the list
course_details_list = []
course_curriculum_list = []

for course_number, course in enumerate(courses_list):
    course_id = course['id']
    
    course_details_endpoint = f"https://www.udemy.com/api-2.0/courses/{course_id}/?fields[course]=title,headline,description,url,visible_instructors,primary_category,primary_subcategory,status_label"

    print(f"Making a call for course number: {course_number} with ID: {course_id}")
    HEADERS = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
    }
    
    course_details = requests.get(
        course_details_endpoint, 
        auth=HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET), 
        # headers=HEADERS
    )
    
    print(course_details)
    print(course_details.json())
    print(course_details.status_code)
    
    if course_details.status_code == 429:
        time.sleep(120)
        
    if course_details.status_code == 403:
        continue
    
    
    print(f'Returning the details for course {course_id}')
        
    course_details_list.append(course_details.json())
    time.sleep(5)
    print()
    
# write first set of results on disk
file_name = f'courses_details_{str(datetime.now().date())}.json'
file_location = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), 'data/raw', file_name)
print(f"Writing the results of the courses list in {file_location}")
write_data_to_json_on_disk(courses=course_details_list, file_location=file_location)

In [None]:
len(course_details_list)

In [None]:
with open('../data/raw/courses_details_2023-08-26.json', 'r') as f:
    initial_courses = json.load(f)

In [None]:
initial_courses

In [None]:
course_ids_first_run = []
for course in initial_courses:
    course_ids_first_run.append(course['id'])

In [None]:
udemy = UdemyClient()

# get the courses from the API
# courses_list = udemy.get_courses_list()

with open('../data/raw/courses_list_2023-08-21.json', 'r') as f:
    courses_list = json.load(f)

print(len(courses_list))

# get the details (title, description, headline etc.) for each course in the list
course_details_list = []
course_curriculum_list = []

for course_number, course in enumerate(courses_list):
    course_id = course['id']
    
    if course_id in course_ids_first_run:
        print('This id is in the initial list of courses acquired yesterday')
        continue
    
    course_details_endpoint = f"https://www.udemy.com/api-2.0/courses/{course_id}/?fields[course]=title,headline,description,url,visible_instructors,primary_category,primary_subcategory,status_label"

    print(f"Making a call for course number: {course_number} with ID: {course_id}")
    
    course_details = requests.get(
        course_details_endpoint, 
        auth=HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET), 
        # headers=HEADERS
    )
    
    print(course_details)
    print(course_details.json())
    print(course_details.status_code)
    
    if course_details.status_code == 429:
        time.sleep(120)
        
    if course_details.status_code == 403:
        continue
    
    
    print(f'Returning the details for course {course_id}')
        
    course_details_list.append(course_details.json())
    time.sleep(2)
    print()

In [None]:
len(course_details_list)

In [None]:
# write first set of results on disk
file_name = f'courses_details_{str(datetime.now().date())}.json'
file_location = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), 'data/raw', file_name)
print(f"Writing the results of the courses list in {file_location}")
write_data_to_json_on_disk(courses=course_details_list, file_location=file_location)

In [None]:
course_ids_first_run.extend(course_details_list)

In [None]:
len(course_ids_first_run)