In [1]:
from data_ingestion import upload_file, search_file, generate_metadata_template, download_file
from rich.console import Console
import os
import uuid
import pandas as pd
import yaml



In [2]:
TEST_DIR = "notebook_tests"
os.makedirs(TEST_DIR, exist_ok=True)
print(f"Test directory '{TEST_DIR}' created.")

# --- Create Data Files ---
with open(os.path.join(TEST_DIR, "report_alpha_v1.pdf"), "w") as f:
    f.write("This is the first report for Project Alpha.")

with open(os.path.join(TEST_DIR, "raw data !! special.mat"), "w") as f:
    f.write("Matlab data with special characters in name.")

with open(os.path.join(TEST_DIR, "beta_image.jpg"), "w") as f:
    f.write("Fake JPG content.")
    
# --- Create Metadata YAML Files ---
metadata_alpha_v1 = {
    "research_project_id": "ProjectAlpha",
    "author": "Dr. Reed",
    "experiment_type": "Initial Analysis",
    "date_conducted": "2025-05-15",
    "custom_tags": "draft, analysis, report"
}
with open(os.path.join(TEST_DIR, "report_alpha_v1.yaml"), "w") as f:
    yaml.dump(metadata_alpha_v1, f)

metadata_special_chars = {
    "research_project_id": "ProjectAlpha / SubGroup B!",
    "author": "Dr. Müller",
    "experiment_type": "Sanitization Test",
    "date_conducted": "2025-05-20",
    "custom_tags": "edge_case, sanitization"
}
with open(os.path.join(TEST_DIR, "raw data !! special.yaml"), "w") as f:
    yaml.dump(metadata_special_chars, f)
    
metadata_beta = {
    "research_project_id": "ProjectBeta",
    "author": "Tech_Smith",
    "date_conducted": "2025-06-01"
    # Note: experiment_type and custom_tags are intentionally omitted
}
with open(os.path.join(TEST_DIR, "beta_image.yaml"), "w") as f:
    yaml.dump(metadata_beta, f)

print("All test data and metadata files have been created.")

Test directory 'notebook_tests' created.
All test data and metadata files have been created.


In [3]:
console = Console()
try:
    print("--- Running Test 1: Happy Path Upload ---")
    data_path = os.path.join(TEST_DIR, "report_alpha_v1.pdf")
    meta_path = os.path.join(TEST_DIR, "report_alpha_v1.yaml")
    
    upload_result = upload_file(data_file_path=data_path, metadata_file_path=meta_path)
    
    console.print("✅ PASSED: Upload successful. API Response:", style="bold green")
    console.print(upload_result)
    
    # Store the file_id for later tests
    happy_path_file_id = upload_result.get("db_file_id")
    
except Exception as e:
    console.print(f"❌ FAILED: {e}", style="bold red")

INFO: Starting ingestion for data file: 'notebook_tests/report_alpha_v1.pdf'
INFO: Calling API at http://localhost:8001/uploadfile/...
INFO: --- Ingestion Successful ---


--- Running Test 1: Happy Path Upload ---


In [4]:
try:
    print("\n--- Running Test 5: Download ---")
    if True:
        # Get the ID of the first file found in the previous search
        file_to_download_id = '30e98988-0086-4c99-9e89-df0e5072afb5'
        original_name = 'report_alpha_v1.pdf'
        
        # Define a destination for the download
        download_destination_folder = os.path.join(TEST_DIR, "downloads")
        os.makedirs(download_destination_folder, exist_ok=True)
        
        print(f"Attempting to download file with ID: {file_to_download_id}")
        
        downloaded_path = download_file(
            file_id=file_to_download_id,
            destination_path=download_destination_folder
        )
        
        assert os.path.exists(downloaded_path) and os.path.getsize(downloaded_path) > 0
        console.print(f"✅ PASSED: File successfully downloaded to: {downloaded_path}", style="bold green")
    else:
        console.print("[yellow]Skipping download test because no files were found in the previous search.[/yellow]")

except Exception as e:
    console.print(f"❌ FAILED: {e}", style="white")

INFO: Downloading file to: notebook_tests/downloads/report_alpha_v1.pdf
INFO: File downloaded successfully!



--- Running Test 5: Download ---
Attempting to download file with ID: 30e98988-0086-4c99-9e89-df0e5072afb5


In [8]:
try:
    print("\n--- Running Test 6: Error Handling ---")
    print("Testing download with a fake UUID...")
    fake_id = str(uuid.uuid4())
    download_file(file_id=fake_id, destination_path=".")
    # If we get here, the test failed because it should have raised an error
    console.print("❌ FAILED: Download function did not raise an error for a fake UUID.", style="bold red")
except Exception as e:
    # We expect an error here. If the error message contains 404, it's correct.
    if "404" in str(e) and "not found" in str(e).lower():
        console.print("✅ PASSED: Correctly caught expected error for fake UUID.", style="bold green")
        console.print(f"   Error message: {e}")
    else:
        console.print("❌ FAILED: Caught an error, but it wasn't the expected 404 Not Found.", style="bold red")
        console.print(f"   Error was: {e}")

ERROR: API returned an error (Status 404): 



--- Running Test 6: Error Handling ---
Testing download with a fake UUID...


In [16]:
console = Console()
def run_test(test_name, **kwargs):
    """A helper function to run a search test and print results."""
    print("\n" + "="*50)
    console.print(f"[bold cyan]Running Test: {test_name}[/bold cyan]")
    console.print(f"Search Criteria: {kwargs}")
    
    try:
        results_df = search_file(**kwargs)
        
        if results_df.empty:
            console.print("[yellow]Search returned no results.[/yellow]")
        else:
            console.print("Search Results:")
            console.print(results_df[['file_id', 'file_name', 'author', 'research_project_id', 'date_conducted', 'minio_object_path']])
    except Exception as e:
        console.print(f"[white]An error occurred during the test: {e}[/white]")
    
run_test(
    "Find files by Dr. Reed in ProjectAlpha",
    author="Reed",
    research_project_id="ProjectAlpha"
)







INFO: Querying API at http://localhost:8001/search/ with parameters: {'research_project_id': 'ProjectAlpha', 'author': 'Reed'}


In [17]:
generate_metadata_template("beta_photo.yaml", overwrite=True)



INFO: Template YAML created at: beta_photo.yaml


In [None]:
upload_file("beta_photo.jpg")

TypeError: upload_file() missing 1 required positional argument: 'metadata_file_path'

In [12]:
search_file(research_project_id="photo_test", file_type = "JPG")

INFO: Querying API at http://localhost:8001/search/ with parameters: {'research_project_id': 'photo_test', 'file_type': 'JPG'}


Unnamed: 0,file_id,research_project_id,file_name,file_type,content_type,experiment_type,author,date_conducted,size_bytes,minio_bucket_name,minio_object_path,upload_timestamp,custom_tags
0,f1c04ab4-f7d8-493f-96b2-ba33b0bf31dc,photo_test,beta_photo.jpg,JPG,application/octet-stream,,willy,NaT,0,raw-data,photo_test/beta_photo.jpg,2025-06-11 15:01:06.392000+00:00,


In [32]:

data_path = ('/Users/wmorrill24/Desktop/022705_p41_frequency_sweep/data/05_27_2025_Data_Calibration_2.5/0527_1653_xz_scan.mat')
upload_file(data_path, "0527_1653_xz_scan.yaml")

INFO: Starting ingestion for data file: '/Users/wmorrill24/Desktop/022705_p41_frequency_sweep/data/05_27_2025_Data_Calibration_2.5/0527_1653_xz_scan.mat'
INFO: Calling API at http://localhost:8001/uploadfile/...
INFO: --- Ingestion Successful ---


{'message': "File '0527_1653_xz_scan.mat' processed. Metadata stored successfully.",
 'db_file_id': '93a737f8-8d74-41c2-ba66-07d1c8fe3b9d',
 'minio_bucket': 'raw-data',
 'minio_object_name': 'Data_Calibration/0527_1653_xz_scan.mat',
 'original_filename': '0527_1653_xz_scan.mat',
 'etag_minio': '772fd627ae946ee7ff9c33501e7fe47e-7',
 'metadata_db_status': 'success',
 'metadata_db_message': 'Metadata stored successfully.',
 'db_details': {'original_file_name': '0527_1653_xz_scan.mat',
  'minio_object_path': 'Data_Calibration/0527_1653_xz_scan.mat',
  'research_project_id': 'Data Calibration'}}

In [33]:
search_file()

INFO: Querying API at http://localhost:8001/search/ with parameters: {}


Unnamed: 0,file_id,research_project_id,file_name,file_type,content_type,experiment_type,author,date_conducted,size_bytes,minio_bucket_name,minio_object_path,upload_timestamp,custom_tags
0,93a737f8-8d74-41c2-ba66-07d1c8fe3b9d,Data Calibration,0527_1653_xz_scan.mat,MAT,application/octet-stream,,wkm,NaT,66655002,raw-data,Data_Calibration/0527_1653_xz_scan.mat,2025-06-11 15:40:53.766000+00:00,
1,f1c04ab4-f7d8-493f-96b2-ba33b0bf31dc,photo_test,beta_photo.jpg,JPG,application/octet-stream,,willy,NaT,0,raw-data,photo_test/beta_photo.jpg,2025-06-11 15:01:06.392000+00:00,
2,c19d4aa9-643a-4cfb-9e5c-1d8b9f9096c9,will,my_cool_experiment.mat,MAT,application/octet-stream,,will,NaT,20,raw-data,will/my_cool_experiment(13).mat,2025-06-11 14:51:13.325000+00:00,
3,3c912342-6924-4a1a-97c5-46d68b228c61,will,my_cool_experiment.mat,MAT,application/octet-stream,,will,NaT,20,raw-data,will/my_cool_experiment(12).mat,2025-06-11 14:48:41.351000+00:00,
4,7f5dbd43-9d70-4e8c-b3b3-edf70a242fb3,will,my_cool_experiment.mat,MAT,application/octet-stream,,will,NaT,20,raw-data,will/my_cool_experiment(11).mat,2025-06-11 14:21:47.183000+00:00,
5,a9ba576e-35fc-4c8b-98db-fecb7b258c0c,will/,my_cool_experiment.mat,MAT,application/octet-stream,,will,NaT,20,raw-data,will/my_cool_experiment(10).mat,2025-06-11 14:15:49.004000+00:00,
6,f919e79a-4e00-44e1-82e2-a0a3bcae8757,client_test/,my_cool_experiment.mat,MAT,application/octet-stream,testing_is_fun,will,2025-06-10,20,raw-data,client_test/my_cool_experiment.mat,2025-06-10 19:30:41.008000+00:00,lebron is the goat
7,f4843a16-365f-4582-b5c9-5c4cdb5cc350,etc/,data_for_empty_yaml.txt,TXT,text/plain,,Security Tester,NaT,25,raw-data,etc/data_for_empty_yaml(1).txt,2025-06-10 17:51:43.649000+00:00,
8,72ab61e6-79a3-4982-b3e6-b14df2b91bb3,../../../etc,data_for_empty_yaml.txt,TXT,text/plain,,Security Tester,NaT,25,raw-data,etc/data_for_empty_yaml.txt,2025-06-10 17:48:52.443000+00:00,
9,b8080cc6-2a3d-4223-af9c-918628cc8000,ProjetÉtudePhénix,data_for_empty_yaml.txt,TXT,text/plain,,Dr. Müller,NaT,25,raw-data,ProjetÉtudePhénix/data_for_empty_yaml.txt,2025-06-10 17:45:45.499000+00:00,"résumé, données"


In [5]:
download_file()

TypeError: download_file() missing 1 required positional argument: 'file_id'