In [1]:
import os

base_dir = "/Users/user/Downloads/DLF/s2/60m"
train_files = [line.strip() for line in open("/Users/user/Downloads/DLF/train_filenames.lst")]

missing_files = []
valid_files = []

for filename in train_files:
    img_path = os.path.join(base_dir, filename)
    if not os.path.exists(img_path):
        missing_files.append(filename)
    else:
        valid_files.append(filename)

# Save missing files for review
with open("missing_files.txt", "w") as f:
    f.write("\n".join(missing_files))

print(f"Total missing files: {len(missing_files)}")
print(f"Total valid files: {len(valid_files)}")

Total missing files: 0
Total valid files: 45337


In [14]:
import numpy as np
import rasterio
from PIL import Image

def normalize_bands(img):
    """Normalize each band using Min-Max scaling (0 to 1)."""
    img_min = img.min(axis=(1, 2), keepdims=True)
    img_max = img.max(axis=(1, 2), keepdims=True)
    return (img - img_min) / (img_max - img_min + 1e-6)  # Avoid division by zero

# Example: Read and normalize one image
img_path = os.path.join(base_dir, valid_files[0])  # Pick a valid file
with rasterio.open(img_path) as src:
    img = src.read()
    img = img.astype(np.float32)
    img_norm = normalize_bands(img)

print(f"Original band range: {img.min()} to {img.max()}")
print(f"Normalized band range: {img_norm.min()} to {img_norm.max()}")



Original band range: 61.0 to 2444.0
Normalized band range: 0.0 to 1.0


In [25]:
import numpy as np
import matplotlib.pyplot as plt

def visualize_rgb_comparison(img, img_norm, bands=(5, 2, 1)):
    """Compare unnormalized vs. normalized images side by side."""
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))

    # Original Image
    img_rgb = np.stack([img[b] for b in bands], axis=-1)
    axes[0].imshow(np.clip(img_rgb, 0, 1))
    axes[0].set_title("Original Image")
    axes[0].axis("off")

    # Normalized Image
    img_rgb_norm = np.stack([img_norm[b] for b in bands], axis=-1)
    axes[1].imshow(np.clip(img_rgb_norm, 0, 1))
    axes[1].set_title("Normalized Image")
    axes[1].axis("off")

    plt.tight_layout()
    plt.show()

# Compare original vs. normalized
print(img, img_norm)


[[[  97.   93.   86.   65.   90.  101.]
  [  93.  104.   91.   61.   85.   93.]
  [ 124.  108.  104.   65.   84.   83.]
  [ 114.   82.   93.   91.   93.   75.]
  [  79.   77.   91.   93.  106.   93.]
  [ 129.  108.  119.  132.  141.  119.]]

 [[ 176.  199.  199.  189.  220.  220.]
  [ 222.  212.  186.  187.  214.  207.]
  [ 268.  249.  204.  185.  173.  147.]
  [ 233.  224.  203.  194.  191.  187.]
  [ 172.  172.  197.  207.  216.  226.]
  [ 246.  263.  301.  311.  313.  309.]]

 [[  89.   79.   87.   73.  104.   98.]
  [ 100.   92.  109.   87.  118.   97.]
  [ 168.  145.  127.   90.  104.   90.]
  [ 149.  123.  130.  126.  147.  126.]
  [ 133.   88.   99.  120.  147.  131.]
  [ 159.  141.  178.  183.  197.  169.]]

 [[1536. 1585. 1605. 1581. 1696. 1721.]
  [1692. 1715. 1648. 1622. 1669. 1535.]
  [1875. 1832. 1610. 1349. 1278. 1242.]
  [1379. 1385. 1336. 1407. 1371. 1391.]
  [1352. 1565. 1718. 1804. 1800. 1924.]
  [1688. 1927. 2238. 2378. 2292. 2246.]]

 [[ 330.  314.  307.  311.  317.

In [18]:
print("Min:", img.min(), "Max:", img.max())


Min: 61.0 Max: 2444.0


In [19]:
print("Original Image: min =", img.min(), "max =", img.max())
print("Normalized Image: min =", img_norm.min(), "max =", img_norm.max())


Original Image: min = 61.0 max = 2444.0
Normalized Image: min = 0.0 max = 1.0
