# MH Service Contacts

In [10]:
import pandas as pd
import csv
import os

## 1. Script Setup

### 1.1 Functions

In [11]:
def load_sas_to_dataframe(sas_file, csv_file):
    """
    Convert a SAS file to a CSV file.
    
    Parameters:
    sas_file (str): Path to the input SAS file.
    csv_file (str): Path to the output CSV file.
    """
    try:
        # Read the SAS file into a pandas DataFrame
        df = pd.read_sas(sas_file, encoding='latin-1')

        return df

    except FileNotFoundError:
        print(f"Error: File not found: {sas_file}")
    except Exception as e:
        print(f"An error occurred: {e}")


In [12]:
def transform_data(df):
    """
    Transform the DataFrame by renaming columns and changing data types.
    
    Parameters:
    df (DataFrame): The DataFrame to transform.
    
    Returns:
    DataFrame: The transformed DataFrame.
    """
    try:
        # 1. Convert SAS date column to datetime

        # If activation_date is not of datetime64 type then convert activation_date from SAS integer to datetime
        if not pd.api.types.is_datetime64_any_dtype( df['activation_date'] ):
            # Convert SAS date to datetime
            # SAS dates are the number of days since January 1, 1960
            # The origin is set to '1960-01-01' and unit is 'D' for days
            df['activation_date'] = pd.to_datetime(df['activation_date'], unit='D', origin='1960-01-01')

        # 2. Change date format

        # Change date format to 'dd/mm/yy'
        df['contdate']          = df['contdate'].dt.strftime('%d/%m/%y')
        df['activation_date']   = df['activation_date'].dt.strftime('%d/%m/%y')
        df['deactivation_date'] = df['deactivation_date'].dt.strftime('%d/%m/%y')

        # 3. Convert SAS time to HH:MM:SS format
        # SAS time is the number of seconds since midnight
        hours   = int(df['sc_start_time'] // 3600)
        minutes = int((df['sc_start_time'] % 3600) // 60)
        seconds = int(df['sc_start_time'] % 60)

        df['sc_start_time'] = f"{hours:02}:{minutes:02}:{seconds:02}"      

        # 4. Change code columns to integers
        
        # Change the data type "code" columns from float to integer
        df['pt_employment_status_code'] = df['pt_employment_status_code'].astype('Int64')
        df['pt_ethnicity_code']         = df['pt_ethnicity_code'].astype('Int64')
        df['sc_legal_status']           = df['sc_legal_status'].astype('Int64')
        df['pt_marital_status_code']    = df['pt_marital_status_code'].astype('Int64')
        df['sc_client_present']         = df['sc_client_present'].astype('Int64')
        df['sc_associate_present']      = df['sc_associate_present'].astype('Int64')
        df['org_code']                  = df['org_code'].astype('Int64')
        df['pt_residential_postcode']   = df['pt_residential_postcode'].astype('Int64')
        df['SA2_MAINCODE']              = df['SA2_MAINCODE'].astype('Int64')
        df['program_code']              = df['program_code'].astype('Int64')
        df['district_code']             = df['district_code'].astype('Int64')
        df['pt_sex_code']               = df['pt_sex_code'].astype('Int64')
        df['stream_code']               = df['stream_code'].astype('Int64')
        df['referral_id']               = df['referral_id'].astype('Int64')

        return df

    except Exception as e:
        print(f"An error occurred during data transformation: {e}")
        return df

In [13]:
def export_dataframe_to_csv(df, csv_file_name):
    """
    Export a DataFrame to a CSV file.
    
    Parameters:
    df (DataFrame)     : The DataFrame to export.
    csv_file_name (str): Path to the output CSV file.
    """
    try:
        # Write the DataFrame to a CSV file
        df.to_csv(csv_file_name, index=False, quoting=csv.QUOTE_NONNUMERIC)
        print(f"Successfully exported {csv_file_name}")
    except Exception as e:
        print(f"An error occurred while exporting to CSV: {e}")

## 2. Process SAS bdat file data

In [14]:
# File paths
path_sas_files = f"C:\\temp\\sas_files"
path_csv_files = f"C:\\temp\\sas_files"

# list of SAS files
lst_sas_files = ["servicecontacts_2425.sas7bdat","servicecontacts_2324.sas7bdat","servicecontacts_2223.sas7bdat","servicecontacts_2122.sas7bdat","servicecontacts_2021.sas7bdat"]
lst_csv_files = ["Service Contacts - currentyear.csv","Service Contacts - year2.csv","Service Contacts - year3.csv","Service Contacts - year4.csv","Service Contacts - year5.csv"]

# Full file paths
lst_full_sas_files = [os.path.join(path_sas_files, file) for file in lst_sas_files]
lst_full_csv_files = [os.path.join(path_csv_files, file) for file in lst_csv_files]



In [15]:
# Main execution flow

# Loop through each SAS file, convert it to a DataFrame, transform the data, and export to CSV
for sas_file, csv_file in zip(lst_full_sas_files, lst_full_csv_files):

    # Print out the current file being processed
    print(f"Processing {sas_file} to {csv_file}")

    # Load SAS file into DataFrame
    df = load_sas_to_dataframe(sas_file, csv_file)
    
    # If DataFrame is successfully loaded, transform and export it
    if df is not None:

        # Transform the DataFrame
        df = transform_data(df)

        # Export the transformed DataFrame to CSV
        export_dataframe_to_csv(df, csv_file)


Processing C:\temp\sas_files\servicecontacts_2425.sas7bdat to C:\temp\sas_files\Service Contacts - currentyear.csv
An error occurred during data transformation: cannot convert the series to <class 'int'>
Successfully exported C:\temp\sas_files\Service Contacts - currentyear.csv
Processing C:\temp\sas_files\servicecontacts_2324.sas7bdat to C:\temp\sas_files\Service Contacts - year2.csv
An error occurred during data transformation: cannot convert the series to <class 'int'>
Successfully exported C:\temp\sas_files\Service Contacts - year2.csv
Processing C:\temp\sas_files\servicecontacts_2223.sas7bdat to C:\temp\sas_files\Service Contacts - year3.csv
An error occurred during data transformation: cannot convert the series to <class 'int'>
Successfully exported C:\temp\sas_files\Service Contacts - year3.csv
Processing C:\temp\sas_files\servicecontacts_2122.sas7bdat to C:\temp\sas_files\Service Contacts - year4.csv
An error occurred during data transformation: cannot convert the series to <cl

## 3. Checks

In [16]:
df.dtypes

pt_age_on_contact                float64
contdate                          object
sc_start_time                    float64
pt_country_of_birth               object
pt_date_of_birth_mmyy             object
sc_duration                      float64
pt_enc_id                         object
pt_employment_status_code        float64
pt_employment_status              object
sc_session_type                   object
pt_residential_health_service     object
HSP                               object
pt_ethnicity_code                float64
pt_ethnicity                      object
sc_legal_status                  float64
pt_marital_status_code           float64
pt_marital_status                 object
sc_medium                         object
sc_client_present                float64
sc_associate_present             float64
se_category                       object
org_code                         float64
org                               object
pt_residential_postcode          float64
pt_residential_s

In [17]:
df.head

<bound method NDFrame.head of          pt_age_on_contact  contdate  sc_start_time pt_country_of_birth  \
0                     40.0  01/12/20        30600.0       Not Specified   
1                    128.0  16/05/21        43380.0       Not Specified   
2                    128.0  13/07/20        55260.0       Not Specified   
3                    128.0  16/12/20        47400.0       Not Specified   
4                    128.0  31/01/21        23280.0       Not Specified   
...                    ...       ...            ...                 ...   
1047196               67.0  22/05/21        63000.0   Western Australia   
1047197                9.0  29/07/20        33360.0   Western Australia   
1047198                9.0  18/08/20        44700.0   Western Australia   
1047199               16.0  09/06/21        43800.0   Western Australia   
1047200               16.0  15/06/21        28800.0   Western Australia   

        pt_date_of_birth_mmyy  sc_duration      pt_enc_id  \
0       