# OneDrive Importer
This notebook will connect to a OneDrive account, list all the available files and download them for further processing by Vectrix
We use the [python-o365](https://github.com/O365/python-o365) library to connect to OneDrive.

## Connect to o365 and fetch the authentication token

In [3]:
import os
from O365 import Account, FileSystemTokenBackend

# Get credentials from environment variables
client_id = os.environ.get('AZURE_CLIENT_ID')
client_secret = os.environ.get('AZURE_CLIENT_SECRET')

if not client_id or not client_secret:
    print("Error: AZURE_CLIENT_ID or AZURE_CLIENT_SECRET environment variables are not set.")
else:
    # Set up the token backend
    token_backend = FileSystemTokenBackend(token_path='.', token_filename='o365_token.txt')

    # Create an Account object
    account = Account((client_id, client_secret), token_backend=token_backend)

    # Authenticate (if not already authenticated)
    if not account.is_authenticated:
        if account.authenticate(scopes=['onedrive_all']):
            print("Authentication successful.")
        else:
            print("Authentication failed.")

    # Function to list files and folders
    

    # Example usage
    list_onedrive_files()

    print("\nYou can list files in a specific folder by calling list_onedrive_files('folder/path')")

Files and folders in OneDrive path: Documents
File: pdf_with_images.pdf

You can list files in a specific folder by calling list_onedrive_files('folder/path')


## List OneDrive files
This function will list all existing files in the specified folder. You can pass "/" if you want to list the root folder.

In [5]:
def list_onedrive_files(folder_path='/'):
        try:
            # Get the storage instance (OneDrive)
            storage = account.storage()

            # Get the default drive (main OneDrive)
            drive = storage.get_default_drive()

            # Get the specified folder
            if folder_path == '/':
                folder = drive.get_root_folder()
            else:
                folder = drive.get_item_by_path(folder_path)

            # List files and folders
            items = folder.get_items()

            print(f"Files and folders in OneDrive path: {folder_path}")
            for item in items:
                item_type = "File" if item.is_file else "Folder"
                print(f"{item_type}: {item.name}")

        except Exception as e:
            print(f"An error occurred: {str(e)}")

list_onedrive_files(folder_path="Documents")

Files and folders in OneDrive path: Documents
File: pdf_with_images.pdf


## Download files by extension

In [34]:
def download_files_by_extension(folder_path='/', file_extension='.pdf', local_dir='./temp'):
    try:
        # Ensure the local directory exists
        os.makedirs(local_dir, exist_ok=True)

        # Get the storage instance (OneDrive)
        storage = account.storage()


        # Get the default drive (main OneDrive)
        drive = storage.get_default_drive()


        # Get the specified folder
        if folder_path == '/':
            folder = drive.get_root_folder()
        else:
            folder = drive.get_item_by_path(folder_path)

        # List files in the folder
        items = folder.get_items()

        # Download files with the specified extension
        downloaded_count = 0
        for item in items:
            if item.is_file and item.name.lower().endswith(file_extension.lower()):
                local_path = os.path.join(local_dir, item.name)
                print(item.web_url)
                print(item.get_versions())
                item.download(to_path=local_dir)
                print(f"Downloaded: {item.name}")
                downloaded_count += 1

        print(f"\nDownloaded {downloaded_count} files with extension '{file_extension}' to {local_dir}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Example usage
# 
download_files_by_extension('Documents', '.pdf', './temp/')

https://1drv.ms/b/s!AHmU05abZem2gR0
[Version Id: current | Modified on: 2024-07-23 17:52:35.480000+02:00 | by: Ben Selleslagh]
Downloaded: pdf_with_images.pdf

Downloaded 1 files with extension '.pdf' to ./temp/
