<a href="https://colab.research.google.com/github/uni-bar/computational-neuro/blob/main/larva.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from IPython.display import clear_output as clear

#### prerequisite

In [15]:
import os
import requests
import random

# Constants
AMOUNT = 50
REPO_OWNER = "uni-bar"
REPO_NAME = "computational-neuro"
BRANCH = "main"
FOLDERS = ["larva", "no_larva"]
BASE_RAW_URL = "https://raw.githubusercontent.com/{}/{}/{}/dataset/{}/{}"

# Create directories to save images
os.makedirs("dataset/larva", exist_ok=True)
os.makedirs("dataset/no_larva", exist_ok=True)

def get_image_list(folder):
    """Fetch the list of images in a folder from the GitHub repository."""
    url = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/tree/{BRANCH}/dataset/{folder}"
    print(f"Fetching image list from {url}")
    response = requests.get(url)
    if response.status_code == 200:
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        image_urls = []
        for link in soup.find_all('a', href=True):
            if any(link['href'].lower().endswith(ext) for ext in ('.png', '.jpg', '.jpeg')):
                image_name = link['href'].split('/')[-1]
                raw_url = BASE_RAW_URL.format(REPO_OWNER, REPO_NAME, BRANCH, folder, image_name)
                image_urls.append(raw_url)
        return image_urls
    else:
        print(f"Error fetching {folder}: {response.status_code}")
        return []

def download_images(image_urls, folder, amount=AMOUNT):
    """Download a specified number of images from the given list."""
    selected_images = random.sample(image_urls, min(amount, len(image_urls)))
    for img_url in selected_images:
        img_name = os.path.join("dataset", folder, os.path.basename(img_url))
        print(f"Downloading {img_url} to {img_name}")
        img_response = requests.get(img_url)
        if img_response.status_code == 200:
            with open(img_name, 'wb') as img_file:
                img_file.write(img_response.content)
        else:
            print(f"Failed to download {img_url}: {img_response.status_code}")

# Load and download images from each folder
for folder in FOLDERS:
    images = get_image_list(folder)
    if images:
        print(f"Downloading images from {folder}...")
        download_images(images, folder)

print("Download complete.")


Fetching image list from https://github.com/uni-bar/computational-neuro/tree/main/dataset/larva
Downloading images from larva...
Downloading https://raw.githubusercontent.com/uni-bar/computational-neuro/main/dataset/larva/back_strike_frame2_2023-05-05_14-32-39-077.png to dataset/larva/back_strike_frame2_2023-05-05_14-32-39-077.png
Downloading https://raw.githubusercontent.com/uni-bar/computational-neuro/main/dataset/larva/back_strike_frame2_2023-05-05_06-34-57-875.png to dataset/larva/back_strike_frame2_2023-05-05_06-34-57-875.png
Downloading https://raw.githubusercontent.com/uni-bar/computational-neuro/main/dataset/larva/back_strike_frame2_2023-05-04_07-43-09-180.png to dataset/larva/back_strike_frame2_2023-05-04_07-43-09-180.png
Downloading https://raw.githubusercontent.com/uni-bar/computational-neuro/main/dataset/larva/back_strike_frame2_2023-05-14_12-31-45-153.png to dataset/larva/back_strike_frame2_2023-05-14_12-31-45-153.png
Downloading https://raw.githubusercontent.com/uni-bar/c


#### Load the larva/no larva images

In [None]:
!pip install datasets torch torchvision tqdm matplotlib
clear()


In [7]:
from datasets import load_dataset
from collections import Counter
import pandas as pd



In [8]:
import torch.nn as nn
import torch.nn.functional as F
# from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, Lambda, Grayscale, ToPILImage


In [None]:
image_size = 512 # TODO:) move to config


def get_transform(image_size):
  return Compose([
      Resize((image_size, image_size)),
      ToTensor(),
      Normalize(mean=[0.5], std=[0.5]),
  ])

def preprocess(sample):
  sample['image'] = get_transform(image_size)(sample['image'])
  return sample

# Load the dataset from the Hub.
def load_larva(is_hugging=False):
    if is_hugging:
        dataset = load_dataset("Bareket/larva")
        dataset = dataset.map(preprocess, remove_columns=["image"])

    else:
        dataset_path = "https://github.com/uni-bar/computational-neuro/tree/main/dataset/larva" # TODO:) move to config
        dataset = ImageFolder(dataset_path, transform=get_transform(image_size))


    # Inspect the dataset.
    print(dataset)
    print("First sample:", dataset[0])



#### display the feature the data

In [None]:
print("Dataset Features:")
print(dataset.features)
print("\n")


# Print the label names (folder names).

if "label" in dataset.features:
    label_names = dataset.features["label"].names
    print("Label names:", label_names)
else:
    print("No 'label' field found in the dataset features.")



#### count the number of images at each label


In [None]:
labels = dataset["label"] # This is a list of integer labels.
label_counts = Counter(labels)
print("\nLabel counts (using Counter):")

for label_id, count in label_counts.items():

    # Convert the numeric label to its corresponding folder name.
    folder_name = label_names[label_id] if "label" in dataset.features else str(label_id)
    print(f"{folder_name}: {count}")

