In [3]:
import os
from pathlib import Path
import io
import zipfile
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File

# --- Configuration ---
SHAREPOINT_SITE_URL = "https://informatiquesytral.sharepoint.com/sites/dppo-ext/"
# IMPORTANT: For production, use Azure App Registration (Client ID/Secret)
# and environment variables for credentials.
SHAREPOINT_USERNAME = os.environ.get("SHAREPOINT_USER")
SHAREPOINT_PASSWORD = os.environ.get("SHAREPOINT_PASSWORD")

# This is the path *within* the SharePoint site, relative to the site's root
# e.g., "Shared Documents/MyFolder/ZipFiles" or "Documents partages/Indispo_asc_esc"
SHAREPOINT_FOLDER_PATH = "Documents partages/Indispo_asc_esc"

DOWNLOAD_LOCATION = Path("./downloaded_zip_files")
EXTRACT_LOCATION = Path("./extracted_zip_contents")


def download_and_extract_zip_files_from_sharepoint(
    site_url, username, password, folder_path, download_dir, extract_dir
):
    """
    Connects to SharePoint, downloads ZIP files from a specified folder,
    and extracts them.
    """
    if not username or not password:
        print(
            "Error: SHAREPOINT_USER and/or SHAREPOINT_PASSWORD environment variables not set."
        )
        print("Please set these variables before running the script.")
        return

    try:
        # Create local directories if they don't exist
        download_dir.mkdir(parents=True, exist_ok=True)
        extract_dir.mkdir(parents=True, exist_ok=True)

        # Authenticate and get client context
        user_credentials = UserCredential(username, password)
        ctx = ClientContext(site_url).with_credentials(user_credentials)
        print(f"Successfully authenticated to SharePoint site: {site_url}")

        # Get the target folder
        target_folder = ctx.web.get_folder_by_server_relative_url(folder_path)
        ctx.load(target_folder)
        ctx.execute_query()
        print(f"Accessing folder: {target_folder.properties['ServerRelativeUrl']}")

        # Get files from the folder
        files = target_folder.files
        ctx.load(files)
        ctx.execute_query()

        if not files:
            print(f"No files found in folder: {folder_path}")
            return

        zip_files_found = 0
        for f in files:
            if f.name.lower().endswith(".zip"):
                zip_files_found += 1
                print(f"Found ZIP file: {f.name}")

                # Define download path
                download_file_path = download_dir / f.name

                # Download the file
                print(f"Downloading {f.name} to {download_file_path}...")
                with open(download_file_path, "wb") as local_file:
                    f.download(local_file).execute_query()
                print(f"Successfully downloaded {f.name}")

                # Extract the ZIP file
                print(f"Extracting {f.name} to {extract_dir / f.name[:-4]}...")
                try:
                    with zipfile.ZipFile(download_file_path, "r") as zip_ref:
                        # Create a subdirectory for each zip's contents
                        specific_extract_path = extract_dir / f.name[:-4] # Remove .zip extension
                        specific_extract_path.mkdir(parents=True, exist_ok=True)
                        zip_ref.extractall(specific_extract_path)
                    print(f"Successfully extracted {f.name}")
                except zipfile.BadZipFile:
                    print(f"Error: {f.name} is not a valid ZIP file or is corrupted.")
                except Exception as e_zip:
                    print(f"Error extracting {f.name}: {e_zip}")

        if zip_files_found == 0:
            print(f"No .zip files found in the folder: {folder_path}")

    except Exception as e:
        print(f"An error occurred: {e}")
        if "401" in str(e) or "403" in str(e) or "Unauthorized" in str(e).lower():
            print("This might be an authentication issue. Check your credentials and permissions.")
        if "File Not Found" in str(e) or "404" in str(e):
             print(f"Ensure the folder path '{folder_path}' is correct relative to the site URL '{site_url}'.")


if __name__ == "__main__":
    # IMPORTANT: Set these environment variables in your session before running:
    # $env:SHAREPOINT_USER="your_email@example.com"
    # $env:SHAREPOINT_PASSWORD="your_password"
    #
    # For production, consider using Azure AD App Registration (Client ID/Secret)
    # and storing credentials securely (e.g., Azure Key Vault).

    download_and_extract_zip_files_from_sharepoint(
        SHAREPOINT_SITE_URL,
        SHAREPOINT_USERNAME,
        SHAREPOINT_PASSWORD,
        SHAREPOINT_FOLDER_PATH,
        DOWNLOAD_LOCATION,
        EXTRACT_LOCATION,
    )


Error: SHAREPOINT_USER and/or SHAREPOINT_PASSWORD environment variables not set.
Please set these variables before running the script.
