# SPINE PROBLEMS

## 0 IMPORT LIBRARIES

verify pytorch

In [1]:
import cv2
import numpy as np
import polars as pl
from sklearn.model_selection import KFold, train_test_split
import gc
import importlib
import os
import random
import shutil
import subprocess
from typing import Any, Callable
import pydicom


### 1 GLOBAL SEATINGS

In [2]:
WORKDIR_PATH = "" # "../"
IMAGE_DIM = 244
NO_CACHE = True
CACHE_DIR = WORKDIR_PATH + ".cache/"
DATASET_DIR = WORKDIR_PATH + "dataset/"
ANOTTATIONS_DIR = DATASET_DIR + "annotations/"
TRAIN_DIR = DATASET_DIR + "train_images/"

LOAD_SESSION = False
RESOURCES = "resources/"

## 2 Preprocessing

### 2.1 Functions to preprocess the data

In [None]:
from utils import image_resizer
from utils import image_grayscaler
from utils import image_normalization
from utils import load_image

def preprocess(path:str) -> np.ndarray:
    """
    Preprocess a DICOM image through a series of transformations.
    
    Args:
        path (str): File path to the input DICOM image
    
    Returns:
        np.ndarray: Preprocessed image after resizing, grayscale conversion, and normalization
        Returns None if any step in the preprocessing fails
    """
    image = load_image.load_dicom_image(path)
    if image is not np.ndarray:
        return None
    image = image_resizer.resize_image(image, IMAGE_DIM)
    image = image_grayscaler.grayscaler(image)
    image = image_normalization.normalize_image(image)
    return image

### 2.2 Load Dataset

In [4]:
from utils import visualice

lazy_dataset = any
if os.path.exists(CACHE_DIR + "preprocesed_dataset.parquet"):
    lazy_dataset = pl.read_parquet(CACHE_DIR + 'preprocesed_dataset.parquet').lazy()
    NO_CACHE = False
else:
    lazy_dataset = pl.scan_csv(ANOTTATIONS_DIR + 'p.csv')
visualice.visualice_lazyframe(lazy_dataset)

lesion_type,image
str,list[list[f32]]
"""Osteophytes""","[[0.038441, 0.038441, … 0.267448], [0.038441, 0.038441, … 0.221101], … [0.034351, 0.034351, … 0.727099]]"
"""Disc space narrowing""","[[0.09234, 0.09234, … 0.09234], [0.09234, 0.09234, … 0.09234], … [0.09234, 0.09234, … 0.09234]]"
"""No finding""","[[0.007252, 0.007252, … 0.007252], [0.007252, 0.007252, … 0.007252], … [0.007252, 0.007252, … 0.007252]]"
"""Osteophytes""","[[0.09234, 0.09234, … 0.09234], [0.09234, 0.09234, … 0.09234], … [0.09234, 0.09234, … 0.09234]]"
"""Vertebral collapse""","[[0.122338, 0.134899, … 0.293282], [0.132441, 0.14036, … 0.271163], … [0.027581, 0.027034, … 0.669306]]"


### 2.3 Filter dataset 

In [5]:
from utils import visualice

if NO_CACHE:
    lazy_dataset = (
        lazy_dataset
        .select(['image_id', 'lesion_type'])
    )

### 2.4 Get image path and add to the dataset

In [6]:
if NO_CACHE:
    lazy_dataset = (
        lazy_dataset
        .with_columns((pl.lit(TRAIN_DIR) + pl.col("image_id")+pl.lit('.dicom')).alias("image_path"))
        .drop('image_id')
    )

### 2.6 Preprocess the data

In [7]:
if NO_CACHE:
    lazy_dataset = (
        lazy_dataset
        .with_columns(
            pl.col("image_path")
            .map_elements(
                function=preprocess,
                return_dtype=pl.List(pl.List(pl.Float32))
                )
            .alias("image")
        )
        .drop("image_path")
    )

### 2.7 Save the preprocessed dataset

In [8]:
if NO_CACHE:
    os.makedirs(CACHE_DIR, exist_ok=True)
    cache_filename = os.path.join(CACHE_DIR, f"preprocesed_dataset.parquet")
    lazy_dataset.collect().write_parquet(
            cache_filename,
            compression="snappy"
        )

ReLU(x)=max(0,x)

## AUTOENCODER

<img src="resources/Structure-of-autoencoder-for-feature-extraction.png" alt="Autoencoder PHoto" width="600px" height="300px"/>

We use an autoencoder for dimensionality reduction, in order to 