# Trajectory of Outcomes After A Distal Radius Fracture
A python version of the R analysis report.

## Environment Setup
First, install the dependencies:  
`pip install pandas numpy seaborn statsmodels plotly scikit-learn`

In [None]:
# Packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.regression.mixed_linear_model import MixedLM
import plotly.express as px
import plotly.graph_objects as go

# Set plotting style
sns.set_theme()

print("All dependencies are ready")

所有依赖库已就绪


# Data Import

In [31]:
# Read the CSV file with index_col=0 to use the first column as index
c1 = pd.read_csv("../data/data.csv", index_col=0)
c1 = c1.reset_index().rename(columns={'index': 'X'})  # Convert index to column named 'X'
c1.head()

Unnamed: 0,X,MRN,Age at injury,Sex,ISS,CAD,Hypertension,Osteoporosis,Diabetes,Substance abuse,...,SpecificActivities_5Y,Total_5Y,UsualActivities_5Y,Procedure,admission date,Discharge,Procedure date,Aditional procedures,Revision date,Revision procedure
0,1,15390,79,F,9,,,,,,...,,,,ORIF,2006-08-05,2006-08-08,2006-08-07,,none,none
1,2,36020,63,F,9,Present,Present,,,,...,,,,ORIF,2013-01-10,2013-01-10,2013-01-10,,41305,Revision of IF device
2,3,45377,58,F,9,,,,,,...,,,,ORIF,2009-11-24,2009-11-26,2009-11-25,,none,none
3,4,74420,77,F,9,,Present,,,,...,0.0,5.0,10.0,ORIF,2006-04-03,2006-04-06,2006-04-05,,none,none
4,5,120632,63,F,9,,,,,,...,1.0,6.5,0.0,ORIF,2011-11-01,2011-11-01,2011-11-01,,none,none


In [34]:
# If the column name has space, replace it with underscore.
# If the column name has special character, replace it with underscore.
c1.columns = c1.columns.str.replace(' ', '_')
c1.columns = c1.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)
c1.head()

Unnamed: 0,X,MRN,Age_at_injury,Sex,ISS,CAD,Hypertension,Osteoporosis,Diabetes,Substance_abuse,...,SpecificActivities_5Y,Total_5Y,UsualActivities_5Y,Procedure,admission_date,Discharge,Procedure_date,Aditional_procedures,Revision_date,Revision_procedure
0,1,15390,79,F,9,,,,,,...,,,,ORIF,2006-08-05,2006-08-08,2006-08-07,,none,none
1,2,36020,63,F,9,Present,Present,,,,...,,,,ORIF,2013-01-10,2013-01-10,2013-01-10,,41305,Revision of IF device
2,3,45377,58,F,9,,,,,,...,,,,ORIF,2009-11-24,2009-11-26,2009-11-25,,none,none
3,4,74420,77,F,9,,Present,,,,...,0.0,5.0,10.0,ORIF,2006-04-03,2006-04-06,2006-04-05,,none,none
4,5,120632,63,F,9,,,,,,...,1.0,6.5,0.0,ORIF,2011-11-01,2011-11-01,2011-11-01,,none,none


In [39]:
# Convert binary categories to 0/1
binary_cols = ['Sex', 'CAD', 'Hypertension', 'Osteoporosis', 'Diabetes', 
               'Substance_abuse', 'Alcohol_abuse', 'Depression', 'Anxiety_disorder',
               'Psychosis', 'Malignancy', 'Stroke_TIA', 'Previous_orthopedic_trauma']

# Convert Sex F->0, M->1, and others None->0, Present->1
c1['Sex'] = (c1['Sex'] == 'M').astype(int)
for col in binary_cols[1:]:
    c1[col] = (c1[col] != 'None').astype(int)
# Special case for Revision_procedure
c1['Revision_procedure'] = (c1['Revision_procedure'] == 'Removal of  device').astype(int)

# Check the data
c1.head()

Unnamed: 0,X,MRN,Age_at_injury,Sex,ISS,CAD,Hypertension,Osteoporosis,Diabetes,Substance_abuse,...,SpecificActivities_5Y,Total_5Y,UsualActivities_5Y,Procedure,admission_date,Discharge,Procedure_date,Aditional_procedures,Revision_date,Revision_procedure
0,1,15390,79,0,9,1,1,1,1,1,...,,,,ORIF,2006-08-05,2006-08-08,2006-08-07,,none,0
1,2,36020,63,0,9,1,1,1,1,1,...,,,,ORIF,2013-01-10,2013-01-10,2013-01-10,,41305,0
2,3,45377,58,0,9,1,1,1,1,1,...,,,,ORIF,2009-11-24,2009-11-26,2009-11-25,,none,0
3,4,74420,77,0,9,1,1,1,1,1,...,0.0,5.0,10.0,ORIF,2006-04-03,2006-04-06,2006-04-05,,none,0
4,5,120632,63,0,9,1,1,1,1,1,...,1.0,6.5,0.0,ORIF,2011-11-01,2011-11-01,2011-11-01,,none,0


# Data Clean

In [9]:
# Remove rows where all Total scores are NA or 0
total_cols = ['Total_3M', 'Total_6M', 'Total_1Y', 'Total_5Y']
c1 = c1[~(c1[total_cols].isna().all(axis=1) | (c1[total_cols] == 0).all(axis=1))]

In [10]:
# Data for LME
# Filter for at least two non-NA data points
c3 = c1[c1[total_cols].isna().sum(axis=1) <= 2].copy()

# Create easyc3 with regrouped attributes
cols_to_keep = ['X', 'MRN', 'Age_at_injury', 'Sex', 'ISS', 'CAD', 'Hypertension', 
                'Osteoporosis', 'Diabetes', 'Substance_abuse', 'Alcohol_abuse',
                'Depression', 'Anxiety_disorder', 'Psychosis', 'Malignancy', 
                'Stroke_TIA', 'Previous_orthopedic_trauma', 'Revision_procedure',
                'Function_Baseline', 'Pain_Baseline', 'Total_Baseline',
                'Function_3M', 'Pain_3M', 'Total_3M', 'Function_6M', 'Pain_6M', 
                'Total_6M', 'Function_1Y', 'Pain_1Y', 'Total_1Y',
                'Function_5Y', 'Pain_5Y', 'Total_5Y']

easyc3 = c3[cols_to_keep].copy()

In [11]:
# Create combined categories
easyc3['SubAbuse'] = ((easyc3['Substance_abuse'] + easyc3['Alcohol_abuse']) != 0).astype(int)
easyc3['Mental_illness'] = ((easyc3['Depression'] + easyc3['Anxiety_disorder'] + 
                            easyc3['Psychosis']) != 0).astype(int)


In [12]:
# Select final columns for easyc3
final_cols = ['X', 'MRN', 'Age_at_injury', 'Sex', 'ISS', 'CAD', 'Hypertension',
              'Osteoporosis', 'Diabetes', 'SubAbuse', 'Mental_illness',
              'Malignancy', 'Stroke_TIA', 'Previous_orthopedic_trauma', 
              'Revision_procedure', 'Function_Baseline', 'Pain_Baseline', 
              'Total_Baseline'] + [col for col in easyc3.columns 
                                  if any(x in col for x in ['_3M', '_6M', '_1Y', '_5Y'])]

easyc3 = easyc3[final_cols]

In [14]:
# Create long format data
id_vars = ['X', 'MRN', 'Age_at_injury', 'Sex', 'ISS', 'CAD', 'Hypertension',
           'Osteoporosis', 'Diabetes', 'SubAbuse', 'Mental_illness',
           'Malignancy', 'Stroke_TIA', 'Previous_orthopedic_trauma',
           'Revision_procedure', 'Function_Baseline', 'Pain_Baseline', 'Total_Baseline']

longc3 = pd.wide_to_long(easyc3, 
                        stubnames=['Function', 'Pain', 'Total'],
                        i=id_vars,
                        j='period',
                        suffix='_(3M|6M|1Y|5Y)',
                        sep='').reset_index()


In [15]:
# Convert period to months
period_to_months = {'3M': 3, '6M': 6, '1Y': 12, '5Y': 60}
longc3['month'] = longc3['period'].map(period_to_months)


In [16]:
# Create lme_longc3 by dropping NA values
lme_longc3 = longc3.dropna(subset=['Total'])


In [18]:
# LME Plot function
def plot_individual_slopes():
    fig = plt.figure(figsize=(10, 20))
    
    # Fit individual linear models for each subject
    subjects = lme_longc3['X'].unique()
    coefficients = []
    
    for subject in subjects:
        subject_data = lme_longc3[lme_longc3['X'] == subject]
        if len(subject_data) >= 2:  # Need at least 2 points for regression
            X = subject_data['month'].values.reshape(-1, 1)
            y = subject_data['Total'].values
            try:
                slope, intercept = np.polyfit(X.ravel(), y, 1)
                coefficients.append({'subject': subject, 'slope': slope, 
                                   'intercept': intercept})
            except:
                continue
      # Plot individual regression lines
    coef_df = pd.DataFrame(coefficients)
    
    plt.scatter(coef_df['intercept'], coef_df['slope'], alpha=0.5)
    plt.xlabel('Intercept')
    plt.ylabel('Slope')
    plt.title('Individual Regression Coefficients')
    
    return plt  

In [19]:
# Create the plot
lme_plot = plot_individual_slopes()
plt.show()

** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  5 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  5 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number  4 had an illegal value
** On entry to DLASCL, parameter number 

KeyError: 'intercept'

<Figure size 1000x2000 with 0 Axes>