# MH Service Contacts

In [63]:
import pandas as pd
import csv
import os
import time

## 1. Script Setup

## 2. Process SAS bdat file data

In [64]:
# File paths
path_sas_files = f"C:\\temp\\sas_files"
path_csv_files = f"C:\\temp\\sas_files"

# list of SAS files
lst_sas_files = ["servicecontacts_2425.sas7bdat","servicecontacts_2324.sas7bdat","servicecontacts_2223.sas7bdat","servicecontacts_2122.sas7bdat","servicecontacts_2021.sas7bdat"]
lst_csv_files = ["Service Contacts - currentyear.csv","Service Contacts - year2.csv","Service Contacts - year3.csv","Service Contacts - year4.csv","Service Contacts - year5.csv"]

# Full file paths
lst_full_sas_files = [os.path.join(path_sas_files, file) for file in lst_sas_files]
lst_full_csv_files = [os.path.join(path_csv_files, file) for file in lst_csv_files]



In [69]:
sas_file = lst_full_sas_files[0]
csv_file = lst_full_csv_files[0]

# Print out the current file being processed
print(f"Processing {sas_file} to {csv_file}")

Processing C:\temp\sas_files\servicecontacts_2425.sas7bdat to C:\temp\sas_files\Service Contacts - currentyear.csv


In [70]:
# Read the SAS file into a pandas DataFrame
df = pd.read_sas(sas_file, encoding='latin-1')

In [71]:
# 1. Convert SAS date column to datetime

# If activation_date is not of datetime64 type then convert activation_date from SAS integer to datetime
if not pd.api.types.is_datetime64_any_dtype( df['activation_date'] ):
    # Convert SAS date to datetime
    # SAS dates are the number of days since January 1, 1960
    # The origin is set to '1960-01-01' and unit is 'D' for days
    df['activation_date'] = pd.to_datetime(df['activation_date'], unit='D', origin='1960-01-01')

In [72]:
# 2. Change date format

# Change date format to 'dd/mm/yy'
df['contdate']          = df['contdate'].dt.strftime('%d/%m/%y')
df['activation_date']   = df['activation_date'].dt.strftime('%d/%m/%y')
df['deactivation_date'] = df['deactivation_date'].dt.strftime('%d/%m/%y')

In [73]:
# Convert SAS time to HH:MM:SS format

df['sc_start_time'] = df['sc_start_time'].astype('Int64')
df['sc_start_time'] = pd.to_datetime(df["sc_start_time"], unit='s').dt.strftime('%H:%M:%S')

In [74]:
# 4. Change code columns to integers

# Change the data type "code" columns from float to integer
df['pt_employment_status_code'] = df['pt_employment_status_code'].astype('Int64')
df['pt_ethnicity_code']         = df['pt_ethnicity_code'].astype('Int64')
df['sc_legal_status']           = df['sc_legal_status'].astype('Int64')
df['pt_marital_status_code']    = df['pt_marital_status_code'].astype('Int64')
df['sc_client_present']         = df['sc_client_present'].astype('Int64')
df['sc_associate_present']      = df['sc_associate_present'].astype('Int64')
df['org_code']                  = df['org_code'].astype('Int64')
df['pt_residential_postcode']   = df['pt_residential_postcode'].astype('Int64')
df['SA2_MAINCODE']              = df['SA2_MAINCODE'].astype('Int64')
df['program_code']              = df['program_code'].astype('Int64')
df['district_code']             = df['district_code'].astype('Int64')
df['pt_sex_code']               = df['pt_sex_code'].astype('Int64')
df['stream_code']               = df['stream_code'].astype('Int64')
df['referral_id']               = df['referral_id'].astype('Int64')

## 3. Checks

In [75]:
df.dtypes

pt_age_on_contact                float64
contdate                          object
sc_start_time                     object
pt_country_of_birth               object
pt_date_of_birth_mmyy             object
sc_duration                      float64
pt_enc_id                         object
pt_employment_status_code          Int64
pt_employment_status              object
sc_session_type                   object
pt_residential_health_service     object
HSP                               object
pt_ethnicity_code                  Int64
pt_ethnicity                      object
sc_legal_status                    Int64
pt_marital_status_code             Int64
pt_marital_status                 object
sc_medium                         object
sc_client_present                  Int64
sc_associate_present               Int64
se_category                       object
org_code                           Int64
org                               object
pt_residential_postcode            Int64
pt_residential_s

In [68]:
df.head

<bound method NDFrame.head of          pt_age_on_contact   contdate  sc_start_time pt_country_of_birth  \
0                     37.0 2023-01-30        45600.0       Not Specified   
1                     35.0 2022-12-15        29940.0       Not Specified   
2                     23.0 2023-01-20        72000.0       Not Specified   
3                     30.0 2023-02-24        29400.0       Not Specified   
4                     36.0 2023-06-29        55980.0       Not Specified   
...                    ...        ...            ...                 ...   
1023939               16.0 2022-07-14        51300.0   Western Australia   
1023940               16.0 2022-07-18        49200.0   Western Australia   
1023941               16.0 2022-07-19        34200.0   Western Australia   
1023942               16.0 2022-08-31        37200.0   Western Australia   
1023943               17.0 2023-04-11        43800.0   Western Australia   

        pt_date_of_birth_mmyy  sc_duration      pt_enc_id