In [None]:
"""
OneDrive/SharePoint Data Access Module for Sentinel-2 Data
===========================================================

This module provides functions to access Sentinel-2 data stored on OneDrive/SharePoint
without downloading everything to local storage.

Prerequisites:
--------------
pip install msal requests rasterio numpy --break-system-packages

Setup:
------
1. Register an app in Azure AD (https://portal.azure.com/#blade/Microsoft_AAD_IAM/ActiveDirectoryMenuBlade/RegisteredApps)
2. Get your Client ID, Tenant ID, and create a Client Secret
3. Grant the app permissions: Sites.Read.All, Files.Read.All
4. Store credentials securely (use environment variables or config file)
"""

import os
import requests
from msal import ConfidentialClientApplication
import io
import rasterio
from rasterio.io import MemoryFile
import tempfile


class OneDriveAccess:
    """Handle authentication and file access to OneDrive/SharePoint"""
    
    def __init__(self, tenant_id, client_id, client_secret):
        """
        Initialize OneDrive access with Azure AD credentials
        
        Parameters:
        -----------
        tenant_id : str
            Your Azure AD tenant ID
        client_id : str
            Your registered app's client ID
        client_secret : str
            Your app's client secret
        """
        self.tenant_id = tenant_id
        self.client_id = client_id
        self.client_secret = client_secret
        self.token = None
        self.authenticate()
    
    def authenticate(self):
        """Authenticate with Microsoft Graph API"""
        authority = f"https://login.microsoftonline.com/{self.tenant_id}"
        scope = ["https://graph.microsoft.com/.default"]
        
        app = ConfidentialClientApplication(
            self.client_id,
            authority=authority,
            client_credential=self.client_secret
        )
        
        result = app.acquire_token_silent(scope, account=None)
        if not result:
            result = app.acquire_token_for_client(scopes=scope)
        
        if "access_token" in result:
            self.token = result["access_token"]
            print("‚úì Successfully authenticated with Microsoft Graph API")
        else:
            raise Exception(f"Authentication failed: {result.get('error_description')}")
    
    def get_headers(self):
        """Get authorization headers for API requests"""
        return {
            "Authorization": f"Bearer {self.token}",
            "Accept": "application/json"
        }
    
    def parse_sharepoint_url(self, sharepoint_url):
        """
        Parse SharePoint URL to extract site and path information
        
        Example URL:
        https://wu.sharepoint.com/:f:/r/sites/DSLab2025-infrared.city/Freigegebene%20Dokumente/Sentinel-2%20Data
        """
        # Extract site name and path
        if "sites/" in sharepoint_url:
            parts = sharepoint_url.split("sites/")[1]
            site_name = parts.split("/")[0]
            return site_name
        raise ValueError("Invalid SharePoint URL format")
    
    def get_site_id(self, site_name):
        """Get the site ID from site name"""
        url = f"https://graph.microsoft.com/v1.0/sites/wu.sharepoint.com:/sites/{site_name}"
        response = requests.get(url, headers=self.get_headers())
        
        if response.status_code == 200:
            return response.json()["id"]
        else:
            raise Exception(f"Failed to get site ID: {response.text}")
    
    def list_drive_items(self, site_id, folder_path="Sentinel-2 Data"):
        """
        List items in a SharePoint folder
        
        Parameters:
        -----------
        site_id : str
            SharePoint site ID
        folder_path : str
            Path to folder (e.g., "Sentinel-2 Data" or "Sentinel-2 Data/Amsterdam")
        """
        # Get drive ID (usually "Documents" drive)
        drive_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
        response = requests.get(drive_url, headers=self.get_headers())
        
        if response.status_code != 200:
            raise Exception(f"Failed to get drives: {response.text}")
        
        drives = response.json()["value"]
        # Find the "Freigegebene Dokumente" (Shared Documents) drive
        doc_drive = next((d for d in drives if "document" in d["name"].lower()), drives[0])
        drive_id = doc_drive["id"]
        
        # List items in folder
        items_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{folder_path}:/children"
        response = requests.get(items_url, headers=self.get_headers())
        
        if response.status_code == 200:
            return response.json()["value"]
        else:
            raise Exception(f"Failed to list items: {response.text}")
    
    def download_file(self, download_url, local_path=None):
        """
        Download a file from OneDrive/SharePoint
        
        Parameters:
        -----------
        download_url : str
            The @microsoft.graph.downloadUrl from the file item
        local_path : str, optional
            Local path to save file. If None, returns file content as bytes
        
        Returns:
        --------
        bytes or str
            File content as bytes if local_path is None, otherwise local file path
        """
        response = requests.get(download_url)
        
        if response.status_code == 200:
            if local_path:
                os.makedirs(os.path.dirname(local_path), exist_ok=True)
                with open(local_path, 'wb') as f:
                    f.write(response.content)
                return local_path
            else:
                return response.content
        else:
            raise Exception(f"Failed to download file: {response.status_code}")
    
    def open_raster_from_onedrive(self, download_url):
        """
        Open a raster file (GeoTIFF) directly from OneDrive without downloading to disk
        
        Parameters:
        -----------
        download_url : str
            The @microsoft.graph.downloadUrl from the file item
        
        Returns:
        --------
        rasterio.DatasetReader
            Opened raster dataset
        """
        # Download file content
        content = self.download_file(download_url)
        
        # Open in memory
        memfile = MemoryFile(content)
        return memfile.open()


# =============================================================================
# Example Usage Functions
# =============================================================================

def setup_credentials():
    """
    Setup Azure AD credentials
    
    IMPORTANT: Store these securely! Options:
    1. Environment variables (recommended for production)
    2. Config file (add to .gitignore)
    3. Azure Key Vault (most secure)
    """
    # Option 1: From environment variables
    tenant_id = os.getenv("AZURE_TENANT_ID")
    client_id = os.getenv("AZURE_CLIENT_ID")
    client_secret = os.getenv("AZURE_CLIENT_SECRET")
    
    # Option 2: From config file (create this file and add to .gitignore!)
    if not all([tenant_id, client_id, client_secret]):
        try:
            with open("azure_config.txt", "r") as f:
                lines = f.readlines()
                tenant_id = lines[0].strip()
                client_id = lines[1].strip()
                client_secret = lines[2].strip()
        except FileNotFoundError:
            print("Please create 'azure_config.txt' with your credentials:")
            print("Line 1: TENANT_ID")
            print("Line 2: CLIENT_ID")
            print("Line 3: CLIENT_SECRET")
            return None, None, None
    
    return tenant_id, client_id, client_secret


def example_list_sentinel_data():
    """Example: List all Sentinel-2 data in OneDrive"""
    tenant_id, client_id, client_secret = setup_credentials()
    
    if not all([tenant_id, client_id, client_secret]):
        print("‚ùå Credentials not configured")
        return
    
    # Initialize OneDrive access
    od = OneDriveAccess(tenant_id, client_id, client_secret)
    
    # Get site ID
    site_id = od.get_site_id("DSLab2025-infrared.city")
    
    # List items in Sentinel-2 Data folder
    items = od.list_drive_items(site_id, "Freigegebene Dokumente/Sentinel-2 Data")
    
    print("\nüìÇ Sentinel-2 Data Contents:")
    print("=" * 60)
    for item in items:
        item_type = "üìÅ" if "folder" in item else "üìÑ"
        print(f"{item_type} {item['name']}")
    
    return od, site_id


def example_load_amsterdam_data(od, site_id):
    """Example: Load Amsterdam Sentinel-2 data"""
    # List Amsterdam folder contents
    items = od.list_drive_items(
        site_id, 
        "Freigegebene Dokumente/Sentinel-2 Data/Amsterdam"
    )
    
    print("\nüìÇ Amsterdam Folder Contents:")
    print("=" * 60)
    for item in items:
        if item['name'].endswith('.tif'):
            print(f"üìÑ {item['name']}")
            
            # Example: Open the file directly in memory
            if "stack" in item['name'].lower():
                print(f"   Opening {item['name']}...")
                download_url = item['@microsoft.graph.downloadUrl']
                
                with od.open_raster_from_onedrive(download_url) as src:
                    print(f"   Shape: {src.shape}")
                    print(f"   Bands: {src.count}")
                    print(f"   CRS: {src.crs}")


if __name__ == "__main__":
    print("OneDrive/SharePoint Access Module for Sentinel-2 Data")
    print("=" * 60)
    print("\nTo use this module:")
    print("1. Set up Azure AD app registration")
    print("2. Configure credentials (see setup_credentials())")
    print("3. Import and use the OneDriveAccess class")
    print("\nExample:")
    print("  from onedrive_access import OneDriveAccess")
    print("  od = OneDriveAccess(tenant_id, client_id, client_secret)")
    print("  # ... use od.list_drive_items(), od.download_file(), etc.")