# MH Service Contacts

In [1]:
import pandas as pd
import csv
import os
import time

## 1. Script Setup

## 2. Process SAS bdat file data

In [2]:
# File paths
path_sas_files = f"C:\\temp\\sas_files"
path_csv_files = f"C:\\temp\\sas_files"

# list of SAS files
lst_sas_files = ["servicecontacts_2425.sas7bdat","servicecontacts_2324.sas7bdat","servicecontacts_2223.sas7bdat","servicecontacts_2122.sas7bdat","servicecontacts_2021.sas7bdat"]
lst_csv_files = ["Service Contacts - currentyear.csv","Service Contacts - year2.csv","Service Contacts - year3.csv","Service Contacts - year4.csv","Service Contacts - year5.csv"]

# Full file paths
lst_full_sas_files = [os.path.join(path_sas_files, file) for file in lst_sas_files]
lst_full_csv_files = [os.path.join(path_csv_files, file) for file in lst_csv_files]



In [3]:
sas_file = lst_full_sas_files[0]
csv_file = lst_full_csv_files[0]

# Print out the current file being processed
print(f"Processing {sas_file} to {csv_file}")

Processing C:\temp\sas_files\servicecontacts_2425.sas7bdat to C:\temp\sas_files\Service Contacts - currentyear.csv


In [4]:
# Read the SAS file into a pandas DataFrame
df = pd.read_sas(sas_file, encoding='latin-1')

In [5]:
# 1. Convert SAS date column to datetime

# If activation_date is not of datetime64 type then convert activation_date from SAS integer to datetime
if not pd.api.types.is_datetime64_any_dtype( df['activation_date'] ):
    # Convert SAS date to datetime
    # SAS dates are the number of days since January 1, 1960
    # The origin is set to '1960-01-01' and unit is 'D' for days
    df['activation_date'] = pd.to_datetime(df['activation_date'], unit='D', origin='1960-01-01')

In [6]:
# 2. Change date format

# Change date format to 'dd/mm/yy'
df['contdate']          = df['contdate'].dt.strftime('%d/%m/%y')
df['activation_date']   = df['activation_date'].dt.strftime('%d/%m/%y')
df['deactivation_date'] = df['deactivation_date'].dt.strftime('%d/%m/%y')

In [7]:
# Convert SAS time to HH:MM:SS format

df['sc_start_time'] = df['sc_start_time'].astype('Int64')
df['sc_start_time'] = pd.to_datetime(df["sc_start_time"], unit='s').dt.strftime('%H:%M:%S')

In [8]:
# 4. Change code columns to integers

# Change the data type "code" columns from float to integer
df['pt_employment_status_code'] = df['pt_employment_status_code'].astype('Int64')
df['pt_ethnicity_code']         = df['pt_ethnicity_code'].astype('Int64')
df['sc_legal_status']           = df['sc_legal_status'].astype('Int64')
df['pt_marital_status_code']    = df['pt_marital_status_code'].astype('Int64')
df['sc_client_present']         = df['sc_client_present'].astype('Int64')
df['sc_associate_present']      = df['sc_associate_present'].astype('Int64')
df['org_code']                  = df['org_code'].astype('Int64')
df['pt_residential_postcode']   = df['pt_residential_postcode'].astype('Int64')
df['SA2_MAINCODE']              = df['SA2_MAINCODE'].astype('Int64')
df['program_code']              = df['program_code'].astype('Int64')
df['district_code']             = df['district_code'].astype('Int64')
df['pt_sex_code']               = df['pt_sex_code'].astype('Int64')
df['stream_code']               = df['stream_code'].astype('Int64')
df['referral_id']               = df['referral_id'].astype('Int64')

## 3. Checks

In [9]:
df.dtypes

pt_age_on_contact                float64
contdate                          object
sc_start_time                     object
pt_country_of_birth               object
pt_date_of_birth_mmyy             object
sc_duration                      float64
pt_enc_id                         object
pt_employment_status_code          Int64
pt_employment_status              object
sc_session_type                   object
pt_residential_health_service     object
HSP                               object
pt_ethnicity_code                  Int64
pt_ethnicity                      object
sc_legal_status                    Int64
pt_marital_status_code             Int64
pt_marital_status                 object
sc_medium                         object
sc_client_present                  Int64
sc_associate_present               Int64
se_category                       object
org_code                           Int64
org                               object
pt_residential_postcode            Int64
pt_residential_s

In [10]:
df.head

<bound method NDFrame.head of          pt_age_on_contact  contdate sc_start_time pt_country_of_birth  \
0                    132.0  01/05/25      01:10:00       Not Specified   
1                    132.0  19/12/24      09:00:00       Not Specified   
2                    132.0  30/03/25      21:35:00       Not Specified   
3                    132.0  25/04/25      21:24:00       Not Specified   
4                    132.0  26/03/25      05:08:00       Not Specified   
...                    ...       ...           ...                 ...   
1157566               19.0  16/01/25      14:00:00   Western Australia   
1157567               19.0  17/01/25      09:30:00   Western Australia   
1157568               19.0  17/01/25      14:14:00   Western Australia   
1157569               19.0  28/01/25      09:08:00   Western Australia   
1157570               19.0  25/02/25      14:30:00   Western Australia   

        pt_date_of_birth_mmyy  sc_duration      pt_enc_id  \
0                   