In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.patches as patches
 
import cv2
import os
from os import listdir
import re
import gc
import sys
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm import tqdm
from pprint import pprint
from time import time
import itertools
from skimage import measure 
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import nibabel as nib
from glob import glob

from sklearn.model_selection import train_test_split, StratifiedGroupKFold, GroupKFold
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks

# Models

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [None]:
# Load dataframes
##get metadata created dataset.
train_df = pd.read_csv("../input/rsna-2022-cervical-spine-fracture-detection/train.csv")
train_bbox = pd.read_csv("../input/rsna-2022-cervical-spine-fracture-detection/train_bounding_boxes.csv")

# Print dataframe shapes
print('train shape:', train_df.shape)
print('train bbox shape:', train_bbox.shape)
print('')

# Show first few entries
train_df.head(3)

In [None]:
# Store segmentation paths in a dataframe
base_path = "../input/rsna-2022-cervical-spine-fracture-detection"
seg_paths = glob(f"{base_path}/segmentations/*")
seg_df = pd.DataFrame({'path': seg_paths})
seg_df['StudyInstanceUID'] = seg_df['path'].apply(lambda x:x.split('/')[-1][:-4])
seg_df = seg_df[['StudyInstanceUID','path']]
print('seg_df shape:', seg_df.shape)
seg_df.head(3)

In [None]:
# import metada dataset
meta_train = pd.read_csv("../input/rsna-2022-spine-fracture-detection-metadata/meta_train_clean.csv")

# Only select patients with segmentations
meta_seg = meta_train[meta_train['StudyInstanceUID'].isin(seg_df['StudyInstanceUID'])].reset_index(drop=True)
print('meta_seg shape:', meta_seg.shape)
meta_seg.head(3)

In [None]:
# Example
ex_path = "../input/rsna-2022-cervical-spine-fracture-detection/segmentations/1.2.826.0.1.3680043.12281.nii"
example = nib.load(ex_path)
example = example.get_fdata()  # convert to numpy array
example = example[:, ::-1, ::-1].transpose(2, 1, 0)  # align orientation with train image
np.unique(example[119])

Interpretation:
* 0 ---> background 
* 2 ---> C2

In [None]:
plt.figure()
plt.imshow(example[119])
plt.title('Segmentation example')
plt.colorbar()
plt.axis('off')
plt.show()

In [None]:
# Initialise targets
targets = ['C1','C2','C3','C4','C5','C6','C7']
meta_seg[targets]=0
meta_seg

In [None]:
meta_seg = pd.read_csv('../input/rsna-2022-spine-fracture-detection-metadata/meta_segmentation.csv')
meta_seg.head(3)

In [None]:
# Example
meta_seg[['StudyInstanceUID','Slice']+targets].iloc[199:204,:]

In [None]:
# Print example of extracted vertebrae
print('UID:', meta_seg['StudyInstanceUID'].unique()[0])
pd.set_option('display.max_rows', 500)
meta_seg[meta_seg['StudyInstanceUID']==meta_seg['StudyInstanceUID'].unique()[0]].loc[110:340,targets]

In [None]:
# Calculate slice ratio (to generalise better)
slice_max_seg = meta_seg.groupby('StudyInstanceUID')['Slice'].max().to_dict()
meta_seg['SliceRatio'] = 0
meta_seg['SliceRatio'] = meta_seg['Slice']/meta_seg['StudyInstanceUID'].map(slice_max_seg)

In [None]:
##using metadata for run a simple classifier
features = ['SliceRatio','SliceThickness','ImagePositionPatient_x','ImagePositionPatient_y','ImagePositionPatient_z']
# Features and targets
X = meta_seg[['StudyInstanceUID']+features]
y = meta_seg[targets]

In [None]:
# Train-valid split, grouped by patient (80/20 split)
gkf = GroupKFold(n_splits=5)
(train_idx, valid_idx) = next(gkf.split(X, y, groups = X['StudyInstanceUID']))

# Train set
X_train, y_train = X.iloc[train_idx,:], y.iloc[train_idx,:]

# Validation set
X_valid, y_valid = X.iloc[valid_idx,:], y.iloc[valid_idx,:]

# Drop patient id
X_train = X_train.drop('StudyInstanceUID', axis=1)
X_valid = X_valid.drop('StudyInstanceUID', axis=1)

In [None]:
# Train classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

In [None]:
# Evaluate model
y_preds = clf.predict(X_valid)

total_acc = 0 
for i in range(7):
    acc = (y_valid[f'C{i+1}']==y_preds[:,i]).sum()/len(y_preds[:,i])
    total_acc+=acc/7
    print(f'Accuracy of C{i+1}: {acc} %')

print('')
print(f'Overall accuracy: {total_acc} %')

In [None]:
# Feature importances
pd.DataFrame({'Feature':features, 'Importance':clf.feature_importances_}).sort_values(by='Importance', ascending=False)

In [None]:
preds = clf.predict(X_valid)


In [None]:
np.set_printoptions(threshold=np.inf)
clf.predict(X.drop('StudyInstanceUID',axis=1))[110:250,:]

In [None]:
# Read in metadata for entire train set
meta_train = pd.read_csv('../input/rsna-2022-spine-fracture-detection-metadata/meta_train_clean.csv')

# Calculate slice ratio (to generalise better)
slice_max_train = meta_train.groupby('StudyInstanceUID')['Slice'].max().to_dict()
meta_train['SliceRatio'] = 0
meta_train['SliceRatio'] = meta_train['Slice']/meta_train['StudyInstanceUID'].map(slice_max_train)

# Initialise targets
meta_train[targets]=0

# Predict targets for entire train set
meta_train[targets] = clf.predict(meta_train[features])

# We know images with segmentations have 100% accurate targets so put these back in
meta_train.loc[meta_train['StudyInstanceUID'].isin(meta_seg['StudyInstanceUID']),targets] = meta_seg[targets].values

 

In [None]:
pd.read_csv("../input/rsna-2022-cervical-spine-fracture-detection/train.csv")

In [None]:
meta_df=meta_train[['StudyInstanceUID',"Slice"]+targets]
tr_df=meta_train[['StudyInstanceUID',"Slice"]+targets].groupby("StudyInstanceUID").first().reset_index()

In [None]:
tr_df.to_csv("grouped_tr_df.csv",index=False)

In [None]:
meta_df.to_csv("all_slices_data.csv", index=False)