# Problem Statement
Need to list all series that are potential dose reports in MOST database.

# Solution
1. Filter series with <5 images
2. Filter by study descriptions
3. Manually review remaining
4. Add data cleaning table to the database

# Code
### 1. Setup: get MOST's database

In [6]:
import AMBRA_Backups
import pandas as pd

In [2]:
db_name = 'MOST'
db = AMBRA_Backups.database.Database(db_name)

### 2. Get dose reports

Get all series with <5 instances

In [8]:
columns = [
   'id_series',
   'number_of_dicoms',
   'series_description',
   'id_study',
   'study_description',
   'study_uid',
   'patient_name'
]

In [9]:
series = db.run_select_query(
   '''
   SELECT img_series.id AS 'id_series', 
      img_series.number_of_dicoms, 
      img_series.series_description,
      img_series.id_study, 
      studies.study_description, 
      studies.study_uid, 
      patients.patient_name 
   FROM MOST.img_series 
   INNER JOIN studies ON studies.id = img_series.id_study
   INNER JOIN patients ON studies.id_patient = patients.id
   WHERE number_of_dicoms < 5
   '''
)

series_df = pd.DataFrame(columns=columns, data=series)
series_df

Unnamed: 0,id_series,number_of_dicoms,series_description,id_study,study_description,study_uid,patient_name
0,13,1,Dose_Record,2,CT_HEAD_WO_FOLLOW_UP,2.16.840.1.114444.637556529256860325.1059919336,1186
1,22,1,Dose_Record,3,CT_CTA_HEAD_NECK_W_WO_BASELINE,2.16.840.1.114444.637556527697070963.1636620793,1186
2,34,1,Viz_LVO_Results,3,CT_CTA_HEAD_NECK_W_WO_BASELINE,2.16.840.1.114444.637556527697070963.1636620793,1186
3,42,1,Dose_Record,4,CT_HEAD_WO,2.16.840.1.114444.637556529313565633.1365080542,1186
4,50,3,AAHead_Scout_MPR_cor,6,MRI_BRAIN_WO_UNSCHEDULED_1,2.16.840.1.114444.637472723958126760.2599485960,1128
...,...,...,...,...,...,...,...
9579,315512,1,DummySeriesDesc_13,4823,ANGIO_EVT_IMAGES,1.2.840.114356.2024.16.112.113.147.19.1480.1,1046
9580,315513,1,DummySeriesDesc_14,4823,ANGIO_EVT_IMAGES,1.2.840.114356.2024.16.112.113.147.19.1480.1,1046
9581,315515,1,DummySeriesDesc_16,4823,ANGIO_EVT_IMAGES,1.2.840.114356.2024.16.112.113.147.19.1480.1,1046
9582,315516,1,DummySeriesDesc_17,4823,ANGIO_EVT_IMAGES,1.2.840.114356.2024.16.112.113.147.19.1480.1,1046


Filter series with some variation of 'Dose_Record'

In [10]:
series_dose_filtered = series_df[series_df['series_description'].str.contains('dose', case=False)]
series_dose_filtered

Unnamed: 0,id_series,number_of_dicoms,series_description,id_study,study_description,study_uid,patient_name
0,13,1,Dose_Record,2,CT_HEAD_WO_FOLLOW_UP,2.16.840.1.114444.637556529256860325.1059919336,1186
1,22,1,Dose_Record,3,CT_CTA_HEAD_NECK_W_WO_BASELINE,2.16.840.1.114444.637556527697070963.1636620793,1186
3,42,1,Dose_Record,4,CT_HEAD_WO,2.16.840.1.114444.637556529313565633.1365080542,1186
76,518,1,Dose_Report,41,CT_HEAD_WO_FOLLOW_UP,1.2.840.114350.2.430.2.798268.2.435562870.1,1200
78,566,1,Dose_Report,47,CTA_HEAD_NECK_W_BASELINE,1.2.840.113619.2.182.1080861873190.1620672412....,1196
...,...,...,...,...,...,...,...
9144,297049,1,Dose_Record,4781,CT_HEAD_WO_FOLLOW_UP,1.2.840.113619.2.340.3.3356099742.545.16736785...,1491
9147,297069,1,Dose_Record,4783,CTP_HEAD_W_BASELINE,2.16.840.1.113786.1.52.17.945033817.3545,1491
9149,297077,1,Dose_Record,4784,CT_HEAD_WO_BASELINE,2.16.840.1.113786.1.52.17.945033815.3543,1491
9151,297087,1,Dose_Record,4785,CTA_HEAD_NECK_W_BASELINE,2.16.840.1.113786.1.52.17.945033816.3544,1491
