# WiDS Datathon 2025: Brain Imaging Analysis
## Data Exploration Notebook

This notebook explores the brain imaging data for predicting ADHD diagnosis and biological sex.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nibabel as nib
from nilearn import plotting

%matplotlib inline


In [3]:
# Load the connectome data
# We'll only read a small portion first since the file is large
connectome_data = pd.read_csv('TRAIN/TRAIN_FUNCTIONAL_CONNECTOME_MATRICES.csv', nrows=5)

# Look at the basic information about the dataset
print("Dataset Shape:", connectome_data.shape)
print("\nFirst few columns:")
print(connectome_data.columns[:5])  # Just show first 5 columns to avoid overwhelming output

Dataset Shape: (5, 19901)

First few columns:
Index(['participant_id', '0throw_1thcolumn', '0throw_2thcolumn',
       '0throw_3thcolumn', '0throw_4thcolumn'],
      dtype='object')


In [5]:
# Look at the first few columns and the last few columns
print("First 5 columns:")
print(connectome_data.columns[:5])
print("\nLast 5 columns:")
print(connectome_data.columns[-5:])

First 5 columns:
Index(['participant_id', '0throw_1thcolumn', '0throw_2thcolumn',
       '0throw_3thcolumn', '0throw_4thcolumn'],
      dtype='object')

Last 5 columns:
Index(['196throw_198thcolumn', '196throw_199thcolumn', '197throw_198thcolumn',
       '197throw_199thcolumn', '198throw_199thcolumn'],
      dtype='object')


In [6]:
# First, let's look at the basic information about our dataset
print("1. Basic Dataset Information:")
print("-" * 50)
print(f"Total number of columns: {len(connectome_data.columns)}")
print(f"Number of rows (subjects): {len(connectome_data)}")

print("\n2. First Few Column Names:")
print("-" * 50)
print(connectome_data.columns[:5].tolist())

print("\n3. Last Few Column Names:")
print("-" * 50)
print(connectome_data.columns[-5:].tolist())

print("\n4. First Row Preview:")
print("-" * 50)
# Show first 5 values from the first row
print(connectome_data.iloc[0, :5])

1. Basic Dataset Information:
--------------------------------------------------
Total number of columns: 19901
Number of rows (subjects): 5

2. First Few Column Names:
--------------------------------------------------
['participant_id', '0throw_1thcolumn', '0throw_2thcolumn', '0throw_3thcolumn', '0throw_4thcolumn']

3. Last Few Column Names:
--------------------------------------------------
['196throw_198thcolumn', '196throw_199thcolumn', '197throw_198thcolumn', '197throw_199thcolumn', '198throw_199thcolumn']

4. First Row Preview:
--------------------------------------------------
participant_id      70z8Q2xdTXM3
0throw_1thcolumn        0.093473
0throw_2thcolumn        0.146902
0throw_3thcolumn        0.067893
0throw_4thcolumn        0.015141
Name: 0, dtype: object
