In [None]:
!pip install numpy scipy




In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("flamense160/ucid-dataset")

print("Path to dataset files:", path)

In [None]:
from google.colab import drive
drive.mount('/content/Drive', force_remount=True)

Mounted at /content/Drive


In [None]:
import shutil
import os

dest_folder = '/content/drive/My Drive/ucid/'
os.makedirs(dest_folder, exist_ok=True)

for root, _, files in os.walk(path):
    for file in files:
        src = os.path.join(root, file)
        dst = os.path.join(dest_folder, file)
        shutil.copy(src, dst)


In [None]:
import os
import cv2
from PIL import Image
from pathlib import Path

# --- CONFIG ---
UCID_DIR = Path('/content/drive/My Drive/ucid')
OUT_ROOT = Path('/content/drive/My Drive/ucid_dsets')

assert UCID_DIR.exists(), f"{UCID_DIR} not found"

# the “attacks” we’ll do
ATTACKS = {
    'mf3': lambda img: cv2.medianBlur(img, 3),
    'mf5': lambda img: cv2.medianBlur(img, 5),
    'avg': lambda img: cv2.blur(img, (3,3)),
    'gau': lambda img: cv2.GaussianBlur(img, (3,3), 0.5),
    'res': lambda img: cv2.resize(
        img,
        (int(img.shape[1]*1.5), int(img.shape[0]*1.5)),
        interpolation=cv2.INTER_CUBIC
    ),
    'jpeg': None,  # handled specially
    'Orig': None   # just copy/rescale
}

# target resolutions
SIZES = {
    '512x384': (512, 384),
    '256x256': (256, 256)
}

# compression variants
VARIANTS = {
    'unc': ('png', {'compress_level':0}),   # PNG lossless
    '90': ('jpg', {'quality':90})           # JPEG Q=90
}

# Build folder structure
for atk in ATTACKS:
    for size in SIZES:
        for var in VARIANTS:
            (OUT_ROOT/atk/f"{size}_{atk}_{var}").mkdir(parents=True, exist_ok=True)

# process images
img_exts = {'.png','.jpg','.jpeg','.bmp','.tif','.tiff'}
for img_file in sorted(os.listdir(UCID_DIR)):
    if Path(img_file).suffix.lower() not in img_exts:
        continue
    stem = Path(img_file).stem
    img_path = UCID_DIR/img_file

    # load original in color
    orig = cv2.imread(str(img_path))
    if orig is None:
        print("☢️ failed to load", img_file)
        continue

    for atk, func in ATTACKS.items():
        # 1) get attacked image
        if atk in ('jpeg','Orig'):
            attacked = orig.copy()
        else:
            attacked = func(orig)

        # 2) for each target size
        for size_key, (W,H) in SIZES.items():
            resized = cv2.resize(attacked, (W,H), interpolation=cv2.INTER_AREA)

            # 3a) save uncompressed PNG
            out_dir = OUT_ROOT/atk/f"{size_key}_{atk}_unc"
            out_path = out_dir/f"{stem}.png"
            cv2.imwrite(str(out_path), resized,
                        [cv2.IMWRITE_PNG_COMPRESSION, 0])

            # 3b) save JPEG Q=90
            out_dir2 = OUT_ROOT/atk/f"{size_key}_{atk}_90"
            ext = '.jpg'
            jp = out_dir2/f"{stem}{ext}"
            # use PIL to control Q
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
            Image.fromarray(rgb).save(str(jp), 'JPEG', quality=90)

print("✅ Dataset preparation complete.")


✅ Dataset preparation complete.


# working code

In [None]:
import numpy as np
from scipy import stats
from skimage.util import view_as_windows

def cal_o_n_mf_ovrblk_moments(X, Xmf, win_sz, px_ol):

    def pad_to_odd(im):
        h, w = im.shape
        if h % 2 == 0:
            im = np.vstack([im, np.zeros((1, w))])
        if w % 2 == 0:
            im = np.hstack([im, np.zeros((im.shape[0], 1))])
        return im

    if px_ol == 1:
        X = pad_to_odd(X)
        Xmf = pad_to_odd(Xmf)
        step = win_sz - 1
    elif px_ol == 2:
        step = win_sz - 2
        # don't pad for 2-px overlap

    #overlapping blocks using view_as_windows
    blocks_o = view_as_windows(X, (win_sz, win_sz), step)
    blocks_mf = view_as_windows(Xmf, (win_sz, win_sz), step)

    n_blocks = blocks_o.shape[0] * blocks_o.shape[1]
    blocks_o_flat = blocks_o.reshape(n_blocks, -1)
    blocks_mf_flat = blocks_mf.reshape(n_blocks, -1)

    #skewness and kurtosis in batch
    skew_o = stats.skew(blocks_o_flat, axis=1, nan_policy='omit')
    skew_mf = stats.skew(blocks_mf_flat, axis=1, nan_policy='omit')
    kurt_o = stats.kurtosis(blocks_o_flat, axis=1, nan_policy='omit')
    kurt_mf = stats.kurtosis(blocks_mf_flat, axis=1, nan_policy='omit')

    valid_skew = ~np.isnan(skew_o) & ~np.isnan(skew_mf)
    valid_kurt = ~np.isnan(kurt_o) & ~np.isnan(kurt_mf)

    #count NaNs
    n_nanskewo = np.isnan(skew_o).sum()
    n_nanskewmf = np.isnan(skew_mf).sum()
    n_nankurto = np.isnan(kurt_o).sum()
    n_nankurtmf = np.isnan(kurt_mf).sum()

    if np.all(np.isnan(skew_o)) or np.all(np.isnan(skew_mf)):
        skeworemnan_ovblk = skewmfremnan_ovblk = np.ones(n_blocks)
        n_ovblk_remnanskew = 0
        chk_skew = 1
    else:
        skeworemnan_ovblk = skew_o[valid_skew]
        skewmfremnan_ovblk = skew_mf[valid_skew]
        n_ovblk_remnanskew = len(skeworemnan_ovblk)
        chk_skew = 0

    if np.all(np.isnan(kurt_o)) or np.all(np.isnan(kurt_mf)):
        kurtoremnan_ovblk = kurtmfremnan_ovblk = np.ones(n_blocks)
        n_ovblk_remnankurt = 0
        chk_kurt = 1
    else:
        kurtoremnan_ovblk = kurt_o[valid_kurt]
        kurtmfremnan_ovblk = kurt_mf[valid_kurt]
        n_ovblk_remnankurt = len(kurtoremnan_ovblk)
        chk_kurt = 0

    return (skeworemnan_ovblk, skewmfremnan_ovblk,
            kurtoremnan_ovblk, kurtmfremnan_ovblk,
            n_ovblk_remnanskew, n_ovblk_remnankurt,
            n_nanskewo, n_nanskewmf, n_nankurto, n_nankurtmf,
            chk_skew, chk_kurt)


all attacks

In [None]:
import os
import numpy as np
import cv2
from scipy import stats
from tqdm import tqdm
from pathlib import Path


# Dataset structure parameters
OUT_ROOT = Path('/content/drive/My Drive/ucid_dsets')  # Your dataset root
ATTACKS = ['mf3', 'mf5', 'avg', 'gau', 'res', 'jpeg', 'orig']
SIZES = ['512x384', '256x256']
VARIANTS = ['unc', '90']
UCID_FEAT_DIR = Path('/content/drive/My Drive/ucid_dsets/ucid_features')
UCID_FEAT_DIR.mkdir(parents=True, exist_ok=True)


# Algorithm parameters
win_sz = 3
px_ol = 2

# Process each attack type, size, and variant
for attack in ATTACKS:
    for size in SIZES:
        for variant in VARIANTS:
            # Skip some combinations if needed
            if attack == 'jpeg' and variant == 'unc':
                continue  # Skip jpeg with unc variant as it doesn't exist

            # Define directories
            attack_dir = OUT_ROOT / attack / f"{size}_{attack}_{variant}"
            original_dir = OUT_ROOT / 'orig' / f"{size}_orig_{'unc' if variant == 'unc' else '90'}"

            # Check if directories exist
            if not attack_dir.exists() or not original_dir.exists():
                print(f"Skipping {attack_dir} or {original_dir} - directory not found")
                continue

            print(f"Processing {attack} {size} {variant}...")

            # Get list of images
            image_extensions = ['.jpg', '.jpeg', '.png', '.tif', '.tiff', '.bmp']
            attack_files = [f for f in os.listdir(attack_dir) if os.path.splitext(f.lower())[1] in image_extensions]
            original_files = [f for f in os.listdir(original_dir) if os.path.splitext(f.lower())[1] in image_extensions]

            # Ensure the files match
            attack_stems = [Path(f).stem for f in attack_files]
            original_stems = [Path(f).stem for f in original_files]
            common_stems = set(attack_stems).intersection(set(original_stems))

            if not common_stems:
                print(f"No matching files found between {attack_dir} and {original_dir}")
                continue

            # Filter files to include only common stems
            attack_files = [f for f in attack_files if Path(f).stem in common_stems]
            original_files = [f for f in original_files if Path(f).stem in common_stems]

            # Sort files to ensure matching
            attack_files.sort()
            original_files.sort()

            count = len(attack_files)
            if count == 0:
                print(f"No files found in {attack_dir}")
                continue

            print(f"Found {count} matching files")

            # Initialize feature arrays for attacked images only
            skr_4pk = np.zeros(count)
            skl_4pk = np.zeros(count)
            mean_sk = np.zeros(count)
            var_sk = np.zeros(count)
            kurt = np.zeros(count)
            mid_pk = np.zeros(count)
            skr_1pk = np.zeros(count)
            skl_1pk = np.zeros(count)
            skr_2pk = np.zeros(count)
            skl_2pk = np.zeros(count)
            skr_3pk = np.zeros(count)
            skl_3pk = np.zeros(count)
            n_nansk = np.zeros(count)
            ku_1pk = np.zeros(count)
            ku_2pk = np.zeros(count)
            ku_3pk = np.zeros(count)
            ku_4pk = np.zeros(count)
            mean_ku = np.zeros(count)
            var_ku = np.zeros(count)

            # Process each image
            for t in tqdm(range(count), desc=f"Processing {attack} {size} {variant}"):
                # Load original image
                orig_path = original_dir / original_files[t]
                orig_img = cv2.imread(str(orig_path), cv2.IMREAD_UNCHANGED)
                if orig_img is None:
                    print(f"Failed to load original image: {orig_path}")
                    continue

                # Convert to grayscale if needed
                if len(orig_img.shape) == 3:
                    orig_img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2GRAY)
                orig_img = orig_img.astype(float)

                # Load attacked image
                attack_path = attack_dir / attack_files[t]
                attack_img = cv2.imread(str(attack_path), cv2.IMREAD_UNCHANGED)
                if attack_img is None:
                    print(f"Failed to load attacked image: {attack_path}")
                    continue

                # Convert to grayscale if needed
                if len(attack_img.shape) == 3:
                    attack_img = cv2.cvtColor(attack_img, cv2.COLOR_BGR2GRAY)
                attack_img = attack_img.astype(float)

                # Calculate moments
                (skeworemnan3x3o, skewmf3x3remnan3x3o, kurtoremnan3x3o, kurtmf3x3remnan3x3o,
                 novblk_remnanskew, novblk_remnankurt, n_nanskewo, n_nanskewmf, n_nankurto,
                 n_nankurtmf, chk_skew, chk_kurt) = cal_o_n_mf_ovrblk_moments(orig_img, attack_img, win_sz, px_ol)

                # Handle invalid cases
                if chk_skew == 1:
                    mean_sk[t] = 0
                    var_sk[t] = 0
                    kurt[t] = 0
                    mid_pk[t] = 0
                    skr_1pk[t] = 0
                    skl_1pk[t] = 0
                    skr_2pk[t] = 0
                    skl_2pk[t] = 0
                    skr_3pk[t] = 0
                    skl_3pk[t] = 0
                    skl_4pk[t] = 0
                    skr_4pk[t] = 0
                    n_nansk[t] = n_nanskewmf

                if chk_kurt == 1:
                    ku_1pk[t] = 0
                    ku_2pk[t] = 0
                    ku_3pk[t] = 0
                    ku_4pk[t] = 0
                    mean_ku[t] = 0
                    var_ku[t] = 0

                if chk_skew == 0 and chk_kurt == 0:
                    # Calculate number of bins for histogram
                    n_binskew = 1 + np.ceil(np.log2(novblk_remnanskew))

                    sigma = np.sqrt((6 * (novblk_remnankurt - 2) / (novblk_remnankurt + 1) * (novblk_remnankurt + 3)))
                    n_binkurt = 1 + np.ceil(np.log2(novblk_remnankurt) + np.log2(1 + (abs(stats.skew(kurtoremnan3x3o))) / sigma))

                    if np.isnan(n_binkurt):
                        n_binkurt = n_binskew

                    # Calculate histograms for attacked image features
                    n2, x2 = np.histogram(skewmf3x3remnan3x3o, int(n_binskew))
                    n4, x4 = np.histogram(kurtmf3x3remnan3x3o, int(n_binkurt))

                    # Convert bin edges to bin centers
                    x2 = (x2[:-1] + x2[1:]) / 2
                    x4 = (x4[:-1] + x4[1:]) / 2

                    # Calculate bin widths
                    if n_binskew == 1:
                        _, edges2 = np.histogram(skewmf3x3remnan3x3o, 1)
                        bin_wskmf = edges2[1] - edges2[0]
                    else:
                        bin_wskmf = x2[1] - x2[0] if len(x2) > 1 else 0

                    if n_binkurt == 1:
                        _, edges4 = np.histogram(kurtmf3x3remnan3x3o, 1)
                        bin_wkumf = edges4[1] - edges4[0]
                    else:
                        bin_wkumf = x4[1] - x4[0] if len(x4) > 1 else 0

                    # Calculate features
                    mean_sk[t] = np.mean(skewmf3x3remnan3x3o)
                    var_sk[t] = np.var(skewmf3x3remnan3x3o)
                    kurt[t] = stats.kurtosis(skewmf3x3remnan3x3o)

                    # Find specific skewness and kurtosis values
                    # Feature - check for skewness value (2.4748)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= 2.4748 and x2_lower <= 2.4748:
                            skr_4pk[t] = n2[i]

                    # Feature - check for skewness value (-2.4748)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= -2.4748 and x2_lower <= -2.4748:
                            skl_4pk[t] = n2[i]

                    # Feature - mid point check
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= 0.0000 and x2_lower <= 0.0000:
                            mid_pk[t] = n2[i]

                    # Feature - check for skewness value (1.3363)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= 1.3363 and x2_lower <= 1.3363:
                            skr_1pk[t] = n2[i]

                    # Feature - check for skewness value (-1.3363)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= -1.3363 and x2_lower <= -1.3363:
                            skl_1pk[t] = n2[i]

                    # Feature - check for skewness value (0.7071)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= 0.7071 and x2_lower <= 0.7071:
                            skr_2pk[t] = n2[i]

                    # Feature - check for skewness value (-0.7071)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= -0.7071 and x2_lower <= -0.7071:
                            skl_2pk[t] = n2[i]

                    # Feature - check for skewness value (0.2236)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= 0.2236 and x2_lower <= 0.2236:
                            skr_3pk[t] = n2[i]

                    # Feature - check for skewness value (-0.2236)
                    for i in range(len(x2)):
                        x2_upper = np.fix((x2[i] + bin_wskmf / 2) * (10**4)) / (10**4)
                        x2_lower = np.fix((x2[i] - bin_wskmf / 2) * (10**4)) / (10**4)
                        if x2_upper >= -0.2236 and x2_lower <= -0.2236:
                            skl_3pk[t] = n2[i]

                    # Feature - nan counts
                    n_nansk[t] = n_nanskewmf

                    # Feature - check for kurtosis value (2.7857)
                    for i in range(len(x4)):
                        x4_upper = np.fix((x4[i] + bin_wkumf / 2) * (10**4)) / (10**4)
                        x4_lower = np.fix((x4[i] - bin_wkumf / 2) * (10**4)) / (10**4)
                        if x4_upper >= 2.7857 and x4_lower <= 2.7857:
                            ku_1pk[t] = n4[i]

                    # Feature - check for kurtosis value (1.5000)
                    for i in range(len(x4)):
                        x4_upper = np.fix((x4[i] + bin_wkumf / 2) * (10**4)) / (10**4)
                        x4_lower = np.fix((x4[i] - bin_wkumf / 2) * (10**4)) / (10**4)
                        if x4_upper >= 1.5000 and x4_lower <= 1.5000:
                            ku_2pk[t] = n4[i]

                    # Feature - check for kurtosis value (1.0500)
                    for i in range(len(x4)):
                        x4_upper = np.fix((x4[i] + bin_wkumf / 2) * (10**4)) / (10**4)
                        x4_lower = np.fix((x4[i] - bin_wkumf / 2) * (10**4)) / (10**4)
                        if x4_upper >= 1.0500 and x4_lower <= 1.0500:
                            ku_3pk[t] = n4[i]

                    # Feature - check for kurtosis value (7.1249)
                    for i in range(len(x4)):
                        x4_upper = np.fix((x4[i] + bin_wkumf / 2) * (10**4)) / (10**4)
                        x4_lower = np.fix((x4[i] - bin_wkumf / 2) * (10**4)) / (10**4)
                        if x4_upper >= 7.1249 and x4_lower <= 7.1249:
                            ku_4pk[t] = n4[i]

                    # Feature - mean and variance of kurtosis
                    mean_ku[t] = np.mean(kurtmf3x3remnan3x3o)
                    var_ku[t] = np.var(kurtmf3x3remnan3x3o)

            # Create feature matrix for attacked images only
            features_raw = np.column_stack([
                skl_4pk, skr_4pk, mean_sk, var_sk, kurt, mid_pk, skr_1pk, skl_1pk,
                skr_2pk, skl_2pk, skr_3pk, skl_3pk, n_nansk, ku_1pk, ku_2pk,
                ku_3pk, ku_4pk, mean_ku, var_ku
            ])

            # Calculate min and max for normalization
            min_vals = np.min(features_raw, axis=0)
            max_vals = np.max(features_raw, axis=0)

            # Create normalized features
            features_normalized = features_raw.copy()
            for j in range(count):
                for k in range(features_raw.shape[1]):
                    if max_vals[k] != min_vals[k]:
                        features_normalized[j, k] = ((2 * (features_raw[j, k] - min_vals[k])) /
                                                   (max_vals[k] - min_vals[k])) - 1
            # ... inside your attack/size/variant loops, after building feat_arr:

            # Save results
            # output_name = f"{size}_{attack}_{variant}"
            out_name = f"{size}_{attack}_{variant}.npy"
            out_path = UCID_FEAT_DIR / out_name
            np.save(out_path, features_normalized)

            print(f"Saved features for {out_path}")

print("✅ Feature extraction complete")

Processing mf3 512x384 unc...
Found 1338 matching files


  skew_o = stats.skew(blocks_o_flat, axis=1, nan_policy='omit')
  skew_mf = stats.skew(blocks_mf_flat, axis=1, nan_policy='omit')
  kurt_o = stats.kurtosis(blocks_o_flat, axis=1, nan_policy='omit')
  kurt_mf = stats.kurtosis(blocks_mf_flat, axis=1, nan_policy='omit')
Processing mf3 512x384 unc: 100%|██████████| 1338/1338 [10:45<00:00,  2.07it/s]


Saved features for 512x384_mf3_unc
Processing mf3 512x384 90...
Found 1338 matching files


Processing mf3 512x384 90: 100%|██████████| 1338/1338 [10:19<00:00,  2.16it/s]


Saved features for 512x384_mf3_90
Processing mf3 256x256 unc...
Found 1338 matching files


Processing mf3 256x256 unc: 100%|██████████| 1338/1338 [03:35<00:00,  6.20it/s]


Saved features for 256x256_mf3_unc
Processing mf3 256x256 90...
Found 1338 matching files


Processing mf3 256x256 90: 100%|██████████| 1338/1338 [03:30<00:00,  6.36it/s]


Saved features for 256x256_mf3_90
Processing mf5 512x384 unc...
Found 1338 matching files


Processing mf5 512x384 unc: 100%|██████████| 1338/1338 [10:40<00:00,  2.09it/s]


Saved features for 512x384_mf5_unc
Processing mf5 512x384 90...
Found 1338 matching files


Processing mf5 512x384 90: 100%|██████████| 1338/1338 [10:28<00:00,  2.13it/s]


Saved features for 512x384_mf5_90
Processing mf5 256x256 unc...
Found 1338 matching files


Processing mf5 256x256 unc:  53%|█████▎    | 709/1338 [02:06<01:33,  6.74it/s]

# creating MF35

In [None]:
import numpy as np
import os
from pathlib import Path

def create_mf35_dataset(mf3_files, mf5_files, output_dir):
    """
    Create mf35 dataset by randomly selecting 50% of images from mf3 and mf5 datasets.
    Each file contains data with shape (1338, 19) where 1338 is the number of images
    and 19 is the number of features per image.

    Args:
        mf3_files: List of paths to mf3 .npy files
        mf5_files: List of paths to mf5 .npy files
        output_dir: Directory to save mf35 files
    """
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    for mf3_file, mf5_file in zip(mf3_files, mf5_files):
        mf3_basename = os.path.basename(mf3_file)
        # Get the corresponding mf35 filename
        mf35_basename = mf3_basename.replace('mf3', 'mf35')
        mf35_filepath = os.path.join(output_dir, mf35_basename)

        print(f"Processing {mf3_basename} and {os.path.basename(mf5_file)}...")

        # Load data from both files
        mf3_data = np.load(mf3_file)
        mf5_data = np.load(mf5_file)

        # Check dimensions
        if mf3_data.shape != mf5_data.shape:
            print(f"Warning: Shape mismatch between {mf3_file} ({mf3_data.shape}) and {mf5_file} ({mf5_data.shape})")
            continue

        # Get total number of images
        num_images = mf3_data.shape[0]  # Should be 1338

        # Randomly select 50% indices for mf3
        np.random.seed(42)  # For reproducibility, remove if not needed
        mf3_indices = np.random.choice(num_images, num_images // 2, replace=False)

        # Get remaining indices for mf5
        all_indices = set(range(num_images))
        mf5_indices = list(all_indices - set(mf3_indices))

        # Create the merged dataset
        mf35_data = np.zeros_like(mf3_data)
        mf35_data[mf3_indices] = mf3_data[mf3_indices]  # 50% from mf3
        mf35_data[mf5_indices] = mf5_data[mf5_indices]  # 50% from mf5

        # Save the new dataset
        np.save(mf35_filepath, mf35_data)
        print(f"Created {mf35_filepath} with shape {mf35_data.shape}")

def main():
    # Define the dataset files
    base_dir = "/content/Drive/My Drive/ucid_dsets/ucid_features/"  # Current directory, change if needed

    mf3_files = [
        os.path.join(base_dir, "512x384_mf3_unc.npy"),
        os.path.join(base_dir, "512x384_mf3_90.npy"),
        os.path.join(base_dir, "256x256_mf3_unc.npy"),
        os.path.join(base_dir, "256x256_mf3_90.npy")
    ]

    mf5_files = [
        os.path.join(base_dir, "512x384_mf5_unc.npy"),
        os.path.join(base_dir, "512x384_mf5_90.npy"),
        os.path.join(base_dir, "256x256_mf5_unc.npy"),
        os.path.join(base_dir, "256x256_mf5_90.npy")
    ]

    # Check if files exist before processing
    for file_list in [mf3_files, mf5_files]:
        for file in file_list:
            if not os.path.exists(file):
                print(f"Warning: File {file} does not exist")

    # Create output directory if it doesn't exist
    output_dir = os.path.join(base_dir, "mf35")

    # Process and create mf35 dataset
    create_mf35_dataset(mf3_files, mf5_files, output_dir)

    print("mf35 dataset creation complete!")

if __name__ == "__main__":
    main()

Processing 512x384_mf3_unc.npy and 512x384_mf5_unc.npy...
Created /content/Drive/My Drive/ucid_dsets/ucid_features/mf35/512x384_mf35_unc.npy with shape (1338, 19)
Processing 512x384_mf3_90.npy and 512x384_mf5_90.npy...
Created /content/Drive/My Drive/ucid_dsets/ucid_features/mf35/512x384_mf35_90.npy with shape (1338, 19)
Processing 256x256_mf3_unc.npy and 256x256_mf5_unc.npy...
Created /content/Drive/My Drive/ucid_dsets/ucid_features/mf35/256x256_mf35_unc.npy with shape (1338, 19)
Processing 256x256_mf3_90.npy and 256x256_mf5_90.npy...
Created /content/Drive/My Drive/ucid_dsets/ucid_features/mf35/256x256_mf35_90.npy with shape (1338, 19)
mf35 dataset creation complete!
