<a href="https://colab.research.google.com/github/vjhawar12/FreshNET/blob/main/FreshNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

In [None]:
!gcloud auth application-default login

Go to the following link in your browser, and complete the sign-in prompts:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fapplicationdefaultauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=2hDyy1VYRxMwp31IG3oT9g4QqA3lzh&prompt=consent&token_usage=remote&access_type=offline&code_challenge=9qVHTsGss38khnJHTxlMRpz1Fa5dPabujUwwlp7ZfqE&code_challenge_method=S256

Once finished, enter the verification code provided in your browser: 

In [None]:
!gcloud config set project freshnet-466505

In [None]:
!cd /content/dataset && gcloud storage cp --recursive gs://fruit-images-freshnet .

In [None]:
!cd /content/dataset && ls

[Errno 2] No such file or directory: '/content/dataset && ls'
/content


In [None]:
path_to_train_imgs = ""
path_to_test_imgs = ""

BATCH_SIZE = 256
RANDOM_SEED = 42
TRAIN_SIZE, VAL_SIZE = 0.8, 0.2
IMG_SIZE = 320 # Original images are ~400*400 px so resizing them to 320 retains detail while reducing computational cost

In [None]:
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE, IMG_SIZE),
    # additional preprocessing
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE, IMG_SIZE),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.ToTensor(),
])

In [None]:
full_train_data = ImageFolder(path_to_train_imgs, transform=train_transform)
test_data = ImageFolder(path_to_test_imgs, transform=test_transform)
train_data, val_data = random_split(full_train_data, [TRAIN_SIZE, VAL_SIZE], generator=RANDOM_SEED)

print(f"{full_train_data.class_to_idx} \n {train_data.class_to_idx} \n {test_data.class_to_idx}")

In [None]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=10)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
class InvertedResidual(nn.Module):
  def __init__(self, in_channels, out_channels, exp_factor, downsample_factor, kernel_size=3):
    super().__init__()

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.exp_factor = exp_factor
    self.downsample_factor = downsample_factor
    self.kernel_size = kernel_size

    out = self.in_channels * self.exp_factor

    self.block = nn.Sequential(
        nn.Conv2d(in_channels=self.in_channels, out_channels=out, kernel_size=1, stride=1, groups=1), # expansion
        nn.Conv2d(in_channels=out, out_channels=out, kernel_size=self.kernel_size, stride=self.downsample_factor, groups=out), # depthwise
        nn.Conv2d(in_channels=out, out_channels=self.out_channels, kernel_size=1, stride=1, groups=1), # pointwise
        nn.ReLU6(), # output ∈ [0, 6] ==> more efficient than regular ReLU
    )

  def forward(self, x):
    x = self.block(x)
    return x

In [None]:
class FreshNET_CNN(nn.Module):

  """
  FreshNET applies the concept of depthwise and pointwise convolutions as introduced in the MobileNet paper. The key idea is to decouple spatial filtering and channel mixing,
  which are both handled together in a standard convolution (like a 3*3*3 kernel) into two more efficient operations.

  This separation is implemented in InvertedResidual class. Each instance consists of:
  - A pointwise 1*1 convolution for channel expansion, increasing feature dimensionality.
  - A depthwise 3*3 convolution applied independently per channel for spatial filtering.
  - Another pointwise convolution to project back to a lower-dimensional space.

  This design significantly reduces computational cost while maintaining high representational power. There are 10 layers, of which 7 are InvertedResidual layers.
  Each InvertedResidual layer consists of 1-4 InvertedResidual instances. Within the InvertedResidual layer, there are non-linear activation functions.

  To mitigate potential accuracy trade-offs, techniques such as skip connections and strong data augmentation are applied to help improve generalization and performance
  on validation data.
  """


  def __init__(self):
    super().__init__()

    # silu is good for vanishing gradients and can be applied in the later layers where ReLU might not be as effective (dead neurons)
    silu = nn.SiLU()

    # dropout regularization
    dropout_aggressive = nn.Dropout(0.3)
    dropout_mild = nn.Dropout(0.1)

    # initial regular convolution
    initial_conv = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=2) # 320*320*3 --> 160*160*32

    # inverted residual block 1: 160*160*32 --> 160*160*16, channel expansion = 1
    block_1 = nn.Sequential(
        InvertedResidual(32, 16, 1, 1)
    )

    # inverted residual block 2: 160*160*16 --> 80*80*24, channel expansion = 6
    block_2 = nn.Sequential(
        InvertedResidual(16, 24, 6, 2),
        InvertedResidual(24, 24, 6, 1)
    )

    # inverted residual block 3: 80*80*24 --> 40*40*32, channel expansion = 6
    block_3 = nn.Sequential(
        InvertedResidual(24, 32, 6, 2),
        InvertedResidual(32, 32, 6, 1),
        InvertedResidual(32, 32, 6, 1),
    )

    # inverted residual block 4: 40*40*32 --> 20*20*64, channel expansion = 6
    block_4 = nn.Sequential(
        InvertedResidual(32, 64, 6, 2),
        InvertedResidual(64, 64, 6, 1),
        InvertedResidual(64, 64, 6, 1),
        InvertedResidual(64, 64, 6, 1),
    )

    # inverted residual block 5: 20*20*64 --> 20*20*96, channel expansion = 6
    block_5 = nn.Sequential(
        InvertedResidual(64, 96, 6, 1),
        InvertedResidual(96, 96, 6, 1),
        InvertedResidual(96, 96, 6, 1),
    )

    # inverted residual block 6: 20*20*96 --> 10*10*160, channel expansion = 6
    block_6 = nn.Sequential(
        InvertedResidual(96, 160, 6, 2),
        InvertedResidual(160, 160, 6, 1),
        InvertedResidual(160, 160, 6, 1),
    )

    # inverted residual block 7: 10*10*160 --> 10*10*320, channel expansion = 6
    block_7 = nn.Sequential(
        InvertedResidual(160, 320, 6, 1)
    )

    # final regular convolution
    final_conv = nn.Conv2d(in_channels=320, out_channels=1280, kernel_size=1, stride=1) # 10*10*320 --> 10*10*1280

    """
      MaxPooling is often preferred for highlighting dominant features, but it can be too aggressive in lightweight models
      like MobileNet or EfficientNet, which already have low spatial resolution. It can discard valuable spatial
      information. Average Pooling computes the average of nearby pixels, preserving more context. This makes it more
      suitable for compact architectures like this one.
    """

    avg_pool = nn.AvgPool2d(10) # 10*10*1280 --> 1*1*1280

    # fully connected layers
    fc_1 = nn.Linear(in_features=1280, out_features=500)
    fc_2 = nn.Linear(in_features=500, out_features=2)

    # layer order
    self.layers = nn.ModuleList(
        [
            initial_conv,

            block_1
            block_2,
            block_3,
            block_4,

            dropout_mild,

            block_5,
            block_6,
            block_7,

            final_conv,

            avg_pool, silu, # ReLU in later layers can lead to problems, so I used a different activation function here

            fc_1, silu,

            dropout_aggressive,

            fc_2
        ]
    )

  def forward(self, x):
    for layer in self.layers:
      x = layer(x)

    return x

In [None]:
cnn = FreshNET_CNN()

In [None]:
def train():
  pass

In [None]:
def validate():
  pass

In [None]:
def test():
  pass