In [1]:
print("hello world")

hello world


In [2]:
import os, glob, cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

DATA_DIR = os.getcwd()

TRAIN_IMG_DIR = os.path.join(DATA_DIR, "training_set")
TEST_IMG_DIR  = os.path.join(DATA_DIR, "test_set")

TRAIN_CSV = os.path.join(DATA_DIR, "training_set_pixel_size_and_HC.csv")
TEST_CSV  = os.path.join(DATA_DIR, "test_set_pixel_size.csv")

train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

def pick_col(df, keys):
    for c in df.columns:
        low = str(c).lower()
        for k in keys:
            if k in low:
                return c
    return None

fname_col_train = pick_col(train_df, ["file", "name", "image"])
fname_col_test  = pick_col(test_df,  ["file", "name", "image"])
hc_col = pick_col(train_df, ["hc", "circumference"])

print("Detected:", fname_col_train, hc_col, fname_col_test)
print(train_df.head())


Detected: filename head circumference (mm) filename
     filename  pixel size(mm)  head circumference (mm)
0  000_HC.png        0.069136                    44.30
1  001_HC.png        0.089659                    56.81
2  002_HC.png        0.062033                    68.75
3  003_HC.png        0.091291                    69.00
4  004_HC.png        0.061240                    59.81


In [3]:
def find_image_path(img_dir, fname):
    s = str(fname).strip().strip('"').strip("'")
    base = os.path.splitext(s)[0]
    candidates = [
        os.path.join(img_dir, s),
        os.path.join(img_dir, base + ".png"),
        os.path.join(img_dir, base + ".PNG"),
        os.path.join(img_dir, base.zfill(3) + ".png"),
        os.path.join(img_dir, base.zfill(3) + ".PNG"),
    ]
    for p in candidates:
        if os.path.exists(p):
            return p
    hits = glob.glob(os.path.join(img_dir, base + ".*"))
    return hits[0] if hits else None

IMG_SIZE = 224

X, y = [], []
used_names = []

for _, row in train_df.iterrows():
    img_path = find_image_path(TRAIN_IMG_DIR, row[fname_col_train])
    if img_path is None:
        continue
    if pd.isna(row[hc_col]):
        continue
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype(np.float32) / 255.0
    X.append(img)
    y.append(float(row[hc_col]))
    used_names.append(row[fname_col_train])

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y = np.array(y).astype(np.float32)

print("Train samples:", len(X))
print("X shape:", X.shape, "y shape:", y.shape)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


Train samples: 999
X shape: (999, 224, 224, 1) y shape: (999,)


In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models

def to_rgb(x):
    return tf.image.grayscale_to_rgb(x)

base = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

base.trainable = False

inp = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 1))
x = layers.Lambda(to_rgb)(inp)
x = tf.keras.applications.resnet.preprocess_input(x * 255.0)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.3)(x)
out = layers.Dense(1)(x)

model = models.Model(inp, out)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="mae",
    metrics=["mae"]
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_mae", patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_mae", factor=0.5, patience=2, min_lr=1e-6)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=8,
    callbacks=callbacks
)


Epoch 1/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 297ms/step - loss: 49.8036 - mae: 49.8036 - val_loss: 25.8024 - val_mae: 25.8024 - learning_rate: 0.0010
Epoch 2/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 325ms/step - loss: 25.5319 - mae: 25.5319 - val_loss: 19.8952 - val_mae: 19.8952 - learning_rate: 0.0010
Epoch 3/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 287ms/step - loss: 23.8263 - mae: 23.8263 - val_loss: 22.4174 - val_mae: 22.4174 - learning_rate: 0.0010
Epoch 4/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 283ms/step - loss: 21.5931 - mae: 21.5931 - val_loss: 19.0175 - val_mae: 19.0175 - learning_rate: 0.0010
Epoch 5/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 271ms/step - loss: 20.7875 - mae: 20.7875 - val_loss: 20.7611 - val_mae: 20.7611 - learning_rate: 0.0010
Epoch 6/30
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 269ms/s

In [13]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

y_val_pred = model.predict(X_val).reshape(-1)

mae  = mean_absolute_error(y_val, y_val_pred)
rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
r2   = r2_score(y_val, y_val_pred)

print("MAE :", mae)
print("RMSE:", rmse)
print("R2  :", r2)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 998ms/step
MAE : 15.34813117980957
RMSE: 20.63940222769076
R2  : 0.8944050669670105


In [14]:
X_test = []
test_names = []

for _, row in test_df.iterrows():
    img_path = find_image_path(TEST_IMG_DIR, row[fname_col_test])
    if img_path is None:
        continue
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype(np.float32) / 255.0
    X_test.append(img)
    test_names.append(row[fname_col_test])

X_test = np.array(X_test).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

pred = model.predict(X_test).reshape(-1)

submit = pd.DataFrame({"filename": test_names, "HC": pred})
submit.to_csv(os.path.join(DATA_DIR, "submission_prac2.csv"), index=False)

print(submit.head())


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 734ms/step
     filename          HC
0  000_HC.png  271.677765
1  001_HC.png   71.543472
2  002_HC.png  191.071472
3  003_HC.png  154.983185
4  004_HC.png  294.123962
