In [1]:
# imports
import os
import pandas as pd
import pickle
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload
import google.auth.transport.requests

In [2]:
# stored information
actual_folder_destination = "TODO - replace with your actual folder's ID"
test_folder_destination = "TODO - replace with your test folder's ID. this is what is currently being used in the script"
client_secret_path = "TODO - replace with the path to your client secret"

In [3]:
# load in data
df = pd.read_csv("download_resources_COLAB.csv")
df.rename(columns={"Resource Link":"OLD Resource Link"}, inplace=True)

# create new columns
df["Resource Link"] = ""
df["Resource"] = ""
df

Unnamed: 0,Computing Topics,Context Topics,Libraries Used,Language,Level,Last Updated,OLD Resource Link,Source Institution,Resource Link,Resource
0,"programmatic arithmetic, using variables","calculate cost of purchases, tax, entrepreneur...",,Python,CS1,2023,https://colab.research.google.com/drive/1hDc7m...,Stony Brook University,,
1,"programmatic arithmetic, using variables",calculate monthly lease payment for a car,,Python,CS1,2023,https://colab.research.google.com/drive/1g5R7y...,Stony Brook University,,
2,"programmatic arithmetic, using variables, unit...",calculate time to fill a car's gas tank,,Python,CS1,2023,https://colab.research.google.com/drive/1yduCM...,Stony Brook University,,
3,"programmatic arithmetic, using variables",calculate calories burned when excercising,,Python,CS1,2023,https://colab.research.google.com/drive/1EOkyY...,Stony Brook University,,
4,"programmatic arithmetic, using variables","geometry, cemetery headstones",math,Python,CS1,2023,https://colab.research.google.com/drive/1JydLf...,Stony Brook University,,
...,...,...,...,...,...,...,...,...,...,...
286,"dataframe basics, visualization, data cleaning",life expectancy,"pandas, seaborn, matplotlib",Python,CS1,2023,https://colab.research.google.com/drive/1p6Llj...,Stony Brook University,,
287,"dataframe basics, visualization, data cleaning","NYC Airbnbs, hotels",pandas,Python,CS1,2023,https://colab.research.google.com/drive/1ZB-gy...,Stony Brook University,,
288,"dataframe basics, data cleaning, visualization",Seattle housing market,pandas,Python,CS1,2023,https://colab.research.google.com/drive/1gW19i...,Stony Brook University,,
289,"dataframe basics, data cleaning, visualization",cereal,pandas,Python,CS1,2023,https://colab.research.google.com/drive/1blUn4...,Stony Brook University,,


In [7]:
# define scope
SCOPES = ['https://www.googleapis.com/auth/drive']

manual_upload = []

def process_string(input_str):
    """ Processes a string by removing specific substrings or replacing predefined patterns. If a specific equation is found, it is replaced with a generic description.
             Otherwise, the original string is returned unchanged.

    Args:
        input_str (str): The input string to be processed.

    Returns:
        str: processed string
    """
    # check if string contains './' and strip it off
    if './' in input_str:
        return input_str.replace('./', '')
    if "Evaluate f(x,y,z) =(y + 1)/((x^3)(z + 2)) - 3" in input_str:
        return input_str.replace("Evaluate f(x,y,z) =(y + 1)/((x^3)(z + 2)) - 3", "Evaluate Mathematical Equation")
    return input_str  # return the string unchanged if './' is not present

def authenticate(client_secret_path):
    """ Authenticates the user using OAuth 2.0 and returns valid credentials. Saves new credentials to the `token.pickle` file for future use.

    This function checks for saved credentials in a `token.pickle` file. 
    If no valid credentials are found, it prompts the user to log in via 
    the installed application flow.

    Returns:
        google.auth.credentials.Credentials: Valid credentials for accessing Google services.
    """
    creds = None
    # token.pickle file stores the user's access and refresh tokens
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    # if there are no (valid) credentials, let user log in
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(google.auth.transport.requests.Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                client_secret_path, SCOPES)
            creds = flow.run_local_server(port=0)
        # save credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    
    return creds

def download_and_reupload_COLAB(file_id, drive_service, destination_folder_id, og_link):
    """ Downloads and re-uploads files into destination folder

    Args:
        file_id (int): original file ID
        drive_service (googleapiclient.discovery.Resource): drive service built in main()
        destination_folder_id (int): folder ID of folder where resources will be uploaded
        og_link (string): original link of resource
    
    Returns:
        if download/upload is successful: return file information such as its name and id
        if download/upload is unsuccessful: return error message
    """
    try:
        # get file metadata to fetch file name
        file_metadata = drive_service.files().get(fileId=file_id).execute()
        file_name = file_metadata['name']

        # download file from Google Drive
        request = drive_service.files().get_media(fileId=file_id)
        file_path = f"./{file_name}"

        with open(file_path, 'wb') as file:
            downloader = MediaIoBaseDownload(file, request)
            done = False
            while not done:
                status, done = downloader.next_chunk()
                #print(f"Download progress: {int(status.progress() * 100)}%")

        # re-upload the file as a Google Colab notebook
        file_metadata = {
            'name': file_name,  # change file name
            'parents': [destination_folder_id],
            'mimeType': 'application/vnd.google.colaboratory'  # set MIME type for Colab (uploads file as a Colab file)
        }
        media = MediaFileUpload(file_path, mimetype='application/json')  # set as JSON
        uploaded_file = drive_service.files().create(
            body=file_metadata, media_body=media, fields='id'
        ).execute()
        print(f"File re-uploaded successfully as a Colab notebook: {file_metadata['name']} - {uploaded_file['id']}")

        # clean up local file
        os.remove(file_path)

        return [uploaded_file, process_string(file_metadata['name']), uploaded_file['id']]
    except Exception as e:
        manual_upload.append(og_link)
        print(f"Error in download/re-upload: {e}")
        print(f"     {process_string(file_metadata['name'])}")
        return "CANNOT UPLOAD"

def duplicate_file_COLAB(df, file_id, destination_folder_id, drive_service, idx):
    """ Records information in dataframe to be saved as CSV and uploaded to website

    Args:
        df (pandas DataFrame): dataframe that stores necessary information for each resource
        file_id (int): original file ID
        destination_folder_id (int): folder ID of folder where resources will be uploaded
        drive_service (googleapiclient.discovery.Resource): drive service built in main()
        idx (int): row representing a resource
    """
    original_link = df.at[idx, "OLD Resource Link"]
    uploaded_file = download_and_reupload_COLAB(file_id, drive_service, destination_folder_id, original_link)
    df.at[idx, "Resource"] = f"https://colab.research.google.com/drive/{uploaded_file[2]}||{uploaded_file[1]}" # this link format allows the link to be represented cleanly on the actual website
    df.at[idx, "Resource Link"] = f"https://colab.research.google.com/drive/{uploaded_file[2]}"


def main():
    # authenticate and build the Drive service
    creds = authenticate(client_secret_path)
    drive_service = build('drive', 'v3', credentials=creds)
    destination_folder_id = test_folder_destination # change depending on your needs

    # download and re-upload all files
    count = 0
    for idx in range(len(df)):
        if "colab" in df.at[idx, "OLD Resource Link"]: 
            original_file_id = df.at[idx, "OLD Resource Link"].split("/drive/")[1].split("?")[0]
            duplicate_file_COLAB(df, original_file_id, destination_folder_id, drive_service, idx) 

if __name__ == '__main__':
    main()


<class 'googleapiclient.discovery.Resource'>
File re-uploaded successfully as a Colab notebook: Calculate the Cost of a Purchase - 1oWpN5_pIyZpD44nKnfII1BwRugbIPBCu
File re-uploaded successfully as a Colab notebook: Calculate the Monthly Lease Payment for a Car - 1-_CyWCfm2RPu22BKXBzAwBegFeKDWgo4
File re-uploaded successfully as a Colab notebook: Calculate the Time to Fill a Car's Gas Tank - 1Q5R9VCXX09qzXa0add0ZpWNmUCvmMyzC
File re-uploaded successfully as a Colab notebook: Calorie Calculator - 1PEt8RJ-IaPuP1gI5bd8kBaoD9GueloLa
File re-uploaded successfully as a Colab notebook: Cemetery Headstone - 1XOB7q0aoE-RC-E0aVuukP6fMDSVh0nIy
File re-uploaded successfully as a Colab notebook: Compute the Basal Metabolic Rate - no functions - 1pnOleGTYwhIF9ansbArQVKflmB-5qiuU
File re-uploaded successfully as a Colab notebook: Evaluate $\frac{x(y-1)^4}{3^x} - \frac{(xy)^3}{4}$ - 18UZvZbnNzQIh3DaeoANqKTx8qGc5tKIc
File re-uploaded successfully as a Colab notebook: Evaluate $f(x,y)=\frac{4^y}{xy^2+1}

KeyboardInterrupt: 

In [8]:
# check output
df

Unnamed: 0,Computing Topics,Context Topics,Libraries Used,Language,Level,Last Updated,OLD Resource Link,Source Institution,Resource Link,Resource
0,"programmatic arithmetic, using variables","calculate cost of purchases, tax, entrepreneur...",,Python,CS1,2023,https://colab.research.google.com/drive/1hDc7m...,Stony Brook University,https://colab.research.google.com/drive/1oWpN5...,https://colab.research.google.com/drive/1oWpN5...
1,"programmatic arithmetic, using variables",calculate monthly lease payment for a car,,Python,CS1,2023,https://colab.research.google.com/drive/1g5R7y...,Stony Brook University,https://colab.research.google.com/drive/1-_CyW...,https://colab.research.google.com/drive/1-_CyW...
2,"programmatic arithmetic, using variables, unit...",calculate time to fill a car's gas tank,,Python,CS1,2023,https://colab.research.google.com/drive/1yduCM...,Stony Brook University,https://colab.research.google.com/drive/1Q5R9V...,https://colab.research.google.com/drive/1Q5R9V...
3,"programmatic arithmetic, using variables",calculate calories burned when excercising,,Python,CS1,2023,https://colab.research.google.com/drive/1EOkyY...,Stony Brook University,https://colab.research.google.com/drive/1PEt8R...,https://colab.research.google.com/drive/1PEt8R...
4,"programmatic arithmetic, using variables","geometry, cemetery headstones",math,Python,CS1,2023,https://colab.research.google.com/drive/1JydLf...,Stony Brook University,https://colab.research.google.com/drive/1XOB7q...,https://colab.research.google.com/drive/1XOB7q...
...,...,...,...,...,...,...,...,...,...,...
286,"dataframe basics, visualization, data cleaning",life expectancy,"pandas, seaborn, matplotlib",Python,CS1,2023,https://colab.research.google.com/drive/1p6Llj...,Stony Brook University,,
287,"dataframe basics, visualization, data cleaning","NYC Airbnbs, hotels",pandas,Python,CS1,2023,https://colab.research.google.com/drive/1ZB-gy...,Stony Brook University,,
288,"dataframe basics, data cleaning, visualization",Seattle housing market,pandas,Python,CS1,2023,https://colab.research.google.com/drive/1gW19i...,Stony Brook University,,
289,"dataframe basics, data cleaning, visualization",cereal,pandas,Python,CS1,2023,https://colab.research.google.com/drive/1blUn4...,Stony Brook University,,


In [9]:
# reformat df to match format of table on website
df_reordered = df.loc[:, ["Resource", "Computing Topics", "Context Topics", "Language", "Level", "Libraries Used", "Last Updated", "Source Institution", "Resource Link"]]
df_reordered.drop(columns=["Resource Link"], inplace=True)
df_reordered.head()

# save to excel file 
df_reordered.to_excel('Computing_in_Context_Resources_COLAB.xlsx', index=False)

Unnamed: 0,Resource,Computing Topics,Context Topics,Language,Level,Libraries Used,Last Updated,Source Institution
0,https://colab.research.google.com/drive/1oWpN5...,"programmatic arithmetic, using variables","calculate cost of purchases, tax, entrepreneur...",Python,CS1,,2023,Stony Brook University
1,https://colab.research.google.com/drive/1-_CyW...,"programmatic arithmetic, using variables",calculate monthly lease payment for a car,Python,CS1,,2023,Stony Brook University
2,https://colab.research.google.com/drive/1Q5R9V...,"programmatic arithmetic, using variables, unit...",calculate time to fill a car's gas tank,Python,CS1,,2023,Stony Brook University
3,https://colab.research.google.com/drive/1PEt8R...,"programmatic arithmetic, using variables",calculate calories burned when excercising,Python,CS1,,2023,Stony Brook University
4,https://colab.research.google.com/drive/1XOB7q...,"programmatic arithmetic, using variables","geometry, cemetery headstones",Python,CS1,math,2023,Stony Brook University


In [10]:
# files that required manual upload on intial run of main()
"""https://colab.research.google.com/drive/17X4JvlhhT6_GdCSdQ263Jhu65PVMCG9o?authuser=2
      https://colab.research.google.com/drive/1-nVJEOfR9XOuyKmpQqSjJlawS6uaHVZx?authuser=2
      https://colab.research.google.com/drive/17OurVO4fXdcslY50_gzsw_7QemtvGJga?authuser=2
      https://colab.research.google.com/drive/1kJ18kupuA6KKapwZaaVRA_v1I5BrBYp1?authuser=2
      https://colab.research.google.com/drive/1Is6bjj4U1yRhgrygElYlgfSrh3NbBtgF?authuser=2
      https://colab.research.google.com/drive/1VRnKbUeLZ_1npIrVIPp39zxw2HBMQphS?authuser=2
      https://colab.research.google.com/drive/1e8NuGFblNrUppm2ZAQ5TJemnFi2rs6EM?authuser=2
      https://colab.research.google.com/drive/1NKY9Nii3rlJ7chcZAu8mxONP5PPv8QJ_?authuser=2"""

# check how many files require manual upload
for i in manual_upload:
        print(i)