In [None]:
import os
import requests
from datetime import datetime
import time
import pandas as pd

# Base URL for the Excel files
base_url = "http://universalrating.com/downloads/"
download_dir = r"C:\Users\vlady\Desktop\Chess portfolio"  # Directory to save the downloaded files, customize according to your need

# We need to pass some common headers as arguments in order for the request for the file to go through without being flagged as unauthorized access
headers = {
    "Accept":
    "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding":
    "gzip, deflate",
    "Accept-Language":
    "en-US,en;q=0.9,it-IT;q=0.8,it;q=0.7,ro;q=0.6,pl;q=0.5",
    "Connection":
    "keep-alive",
    "Dnt":
    "1",
    "Host":
    "universalrating.com",
    "If-Range":
    "\"64ce99e2-27ebaf2\"",
    "Referer":
    "http://universalrating.com/downloads.php",
    "Upgrade-Insecure-Requests":
    "1",
    "User-Agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Mobile Safari/537.36"
}

start_time = time.time()

# Function that generates the correct URL to download from by appending a formatted string that takes the current month and year
def generate_excel_url():
    now = datetime.now()
    filename = now.strftime("%Y_%m_URatingList.xlsx")
    return base_url + filename

# Defining a function that can catch errors thrown upon interrogating the URL if the response is invalid
def download_excel_file(url, local_path):
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status(
        )  # Raise an exception for non-200 status codes

        print("Download request was successful.")
        with open(local_path, "wb") as file:
            file.write(response.content)
        return True

    except requests.exceptions.RequestException as e:
        print("An error occurred:", e)
        if isinstance(e, requests.exceptions.HTTPError):
            print("HTTP Error:", e.response.status_code)
        return False


if __name__ == "__main__":
    excel_url = generate_excel_url()
    filename = excel_url.split("/")[-1]
    local_path = os.path.join(download_dir, filename)

    print(f"Generated URL: {excel_url}")

    if download_excel_file(excel_url, local_path):
        print(f"File '{filename}' is downloaded locally at {local_path}.")

        # Load the Excel file into a DataFrame (reading from Sheet1)
        selected_columns = ['FIDE_PlayerCode', 'PlayerName', 'URating']
        sheet_name = 'Sheet1'
        urs_ratings_df = pd.read_excel(local_path,
                                       sheet_name=sheet_name,
                                       usecols=selected_columns)

        # Rename the column 'FIDE_PlayerCode' to 'FideID', this will come in handy when we match URS ratings to Elo ratings from chess-results
        urs_ratings_df.rename(columns={"FIDE_PlayerCode": "FideID"},
                              inplace=True)

        # Pickle the DataFrame for future use
        pickle_filename = os.path.join(download_dir, "urs_ratings.pkl")
        urs_ratings_df.to_pickle(pickle_filename)
        print(f"DataFrame saved as pickle: {pickle_filename}")

        end_time = time.time()
        print(f"Time taken: {end_time - start_time:.2f} seconds")

    else:
        print(f"Failed to download file '{filename}'.")