# Problem Statement
Need to list all series that are potential dose reports in MOST database.

# Solution
1. Filter series with <5 images
2. Filter by study descriptions
3. Manually review remaining
4. Add data cleaning table to the database

# Code
### 1. Setup: get MOST's database and Inteleshare

In [None]:
import AMBRA_Backups
import AMBRA_Utils
import pandas as pd
from datetime import datetime

In [None]:
db_name = "MOST"
db = AMBRA_Backups.database.Database(db_name)

In [None]:
ambra_account_name = "MOST"
ambra = AMBRA_Utils.utilities.get_api()
account = ambra.get_account_by_name(ambra_account_name)

### 2. Get dose reports from Database

Get all series with <5 instances

In [None]:
columns = [
    "id_series",
    "number_of_dicoms",
    "series_description",
    "id_study",
    "study_description",
    "study_uuid",
    "series_count",
    "id_patient",
    "study_deleted",
    "patient_name",
]

In [None]:
series = db.run_select_query(
    """
   SELECT 
      img_series.id AS 'id_series', 
      img_series.number_of_dicoms, 
      img_series.series_description,
      img_series.id_study, 
      studies.study_description, 
      studies.uuid, 
      studies.series_count,
      studies.id_patient,
      studies.deleted AS 'study_deleted', 
      patients.patient_name 
   FROM MOST.img_series 
   INNER JOIN studies ON studies.id = img_series.id_study
   INNER JOIN patients ON studies.id_patient = patients.id
   WHERE 
      deleted = 0
   """
)
series_df = pd.DataFrame(columns=columns, data=series)
series_df.to_excel(f"all_series_{datetime.now()}.xlsx")

Filter series with some variation of 'Dose_Record/Report'

In [None]:
series_dose_filtered = series_df[
    (series_df["series_description"].str.contains("dose", case=False))
    & (series_df["number_of_dicoms"] < 5)
]
series_dose_filtered.to_excel(f"dose_filtered_{datetime.now()}.xlsx")
series_dose_filtered

### 3. Get dose reports that still exist on Inteleshare

03/20/2025: In the database, there is currently no way to query the series that have already been deleted from studies. As such, this section gets existing studies on Inteleshare, then mark which series have been deleted.

In [None]:
series_dose_filtered["exist_on_inteleshare"] = 0
series_dose_filtered["series_count_api"] = series_dose_filtered["series_count"]
series_dose_filtered["studies_image_count"] = 0

In [None]:
# Get existing series on Inteleshare, filter out ones not on Inteleshare
unique_studies = series_dose_filtered["study_uuid"].unique()

for uuid in unique_studies:
    study = account.get_study_by_uuid(uuid)

    if not study:
        raise Exception(f"Study with uid {uuid} not found on Inteleshare!")
    if len(study) > 1 and isinstance(study, list):
        raise ValueError(
            f"There are {len(study)} studies found with {uuid} on Inteleshare!"
        )

    # Get image count
    series_dose_filtered.loc[
        (series_dose_filtered["study_uuid"] == uuid),
        "studies_image_count"
    ] = study.image_count

    # Get live series from Inteleshare

    try:
        live_series = list(study.get_series())
    except Exception:
        print(f"\t Study with uuid {uuid} likely does not exist on Inteleshare")

    for series in live_series:
        series_db_description = series.formatted_description
        if "dose" in series_db_description.lower():
            # Filter
            series_dose_filtered.loc[
                (series_dose_filtered["study_uuid"] == uuid)
                & (series_dose_filtered["series_description"] == series_db_description),
                "exist_on_inteleshare",
            ] = 1

            # Get API number of series
            series_dose_filtered.loc[
                (series_dose_filtered["study_uuid"] == uuid)
                & (series_dose_filtered["series_description"] == series_db_description),
                "series_count_api",
            ] = len(live_series)

series_dose_filtered.drop_duplicates(["study_uuid"])
series_dose_filtered.to_excel(f"dose_filtered_existing_{datetime.now()}.xlsx")

In [None]:
raise Exception("stop")

# Tests

In [None]:
ambra_account_name = "MOST"
ambra = AMBRA_Utils.utilities.get_api()
account = ambra.get_account_by_name(ambra_account_name)

# uuid with dose report in original df
# uuids = [
#     'ad7dd78a-4c20-4060-8854-a3bf98b3a895', #1186
#     '8d39235d-37b3-4ea8-9355-0508d4f991fd', #1200
#     'c18e950b-5867-472a-aed9-ed6a2dc94e4d', #1195
#     # '37c12364-5d28-4478-aa2d-163e13bc54f4', #1121, not found on inteleshare?
#     '0d6e0008-50a2-4562-989d-e3fd282c71ad'  #1146
#     '9377d953-9acc-4fef-863e-00ad5d4aa120'  #1121
# ]

uuid = "8d39235d-37b3-4ea8-9355-0508d4f991fd"


study = account.get_study_by_uuid(uuid)

if not study:
    print(f"uuid {uuid} not found")
else:
    study.download(
        zip_file=f"./Files/{study.patient_name}_{study.formatted_description}.zip",
        ignore_exists=True,
    )

    series = list(study.get_series())
    print(f"""
    study:  {study}
    uuid:       {uuid}
    series:     {series}
    """)

In [None]:
study.schema