##### Extract PDF from Forum DICOM
----------------------------
v.1.0.0

## Imports

In [1]:
import os
import os.path
from os import listdir
from datetime import date

import pydicom

## Input / Output Path

Set path to the Forum .dcm file containing the pdf and directory to store output

In [2]:

# Set to directory that contains the dcm files and where the pdfs will be placed
input_dir = '/Users/nicolebrye/Desktop/HGC/Projects/vf-dicom'


In [3]:
files = [file for file in listdir(input_dir) if file.endswith('.dcm')]


## Extract PDF and write to ouptut

File is written to out directory with same name as input file (using .pdf extension)

In [4]:

# Tag for test type
x = 0x7717
y = 0x1001

# Set today's date
today = str(date.today())
today = today.replace('-', '')

for f in files:

    # File to read
    input_path = os.path.join(input_dir, f)
    
    # Read DICOM
    dcm = pydicom.read_file(input_path)
    pdf_data = bytearray(dcm.EncapsulatedDocument)
    
    # Set title and remove overview
    title = dcm.DocumentTitle
    
    if title.startswith('OVERVIEW'):
        continue
    
    # Determine the type of test
    char_list = [char for char in dcm[x,y].value if char.isdigit()]
    char_list.insert(2, "-")
    test_type = ""
    test_type = test_type.join(char_list)
    
    # Patient ID
    try:
        patient_id = dcm.OriginalAttributesSequence[1].ModifiedAttributesSequence[0].PatientID
    except:
        patient_id = dcm.PatientID
    
    # Eye and visit times
    if dcm.ImageLaterality == 'L':
        eye = 'OS'
    else:
        eye = 'OD'
        
    visit_date = dcm.StudyDate
    visit_time = dcm.AcquisitionDateTime[8:12]
    
    # Set output directory
    output_dir = input_dir + '/' + today + '_' + test_type
    
    # Create output location, overwriting file if it exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    separate = '_' 
    pdf_path = [patient_id, eye, visit_date, test_type, visit_time]
    pdf_path = separate.join(pdf_path) + '.pdf'
    pdf_path = os.path.join(output_dir, pdf_path)
    pdf_file = open(pdf_path,'wb')

    # Write data to output
    pdf_file.write(pdf_data)

    pdf_file.flush()
    pdf_file.close()
