In [None]:
!pip install tensorflow google-cloud-storage

In [None]:
from google.cloud import storage
from IPython.display import Image, display
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from dotenv import load_dotenv
import tensorflow as tf
import pandas as pd
from io import StringIO

In [None]:
# Load CSV data

# Assuming gcsfs is installed and configured
csv_path = 'gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_v1/data.csv'

# Use pandas to directly read from GCS
df = pd.read_csv(csv_path)

# Drop the first column if it is unnamed or not required
if 'Unnamed: 0' in df.columns or df.columns[0] == 'path':  # Check if the first column is 'path' or an unnamed index
    df.drop(df.columns[0], axis=1, inplace=True)

# Show the first few rows to verify
print(df.head())

# Create a dictionary mapping filenames to emotions
emotion_dict = pd.Series(df.label.values, index=df.path.apply(lambda x: x.split('/')[-1])).to_dict()
# print("emotion_dict is", emotion_dict)

In [None]:
# Define global variables

load_dotenv()  # Load the environment variables from the .env file

GOOGLE_KEY_PATH = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
GOOGLE_BUCKET_NAME = os.getenv('GOOGLE_STORAGE_BUCKET')

# Set the GOOGLE_APPLICATION_CREDENTIALS environment variable
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = key_path

# Initialize the Google Cloud Storage client
client = storage.Client()

In [None]:
def match_image_to_category_label(blob, emotion_dict):
    """Reads an image and its label from Google Cloud Storage."""
    image_data = blob.download_as_bytes()
    image_array = np.frombuffer(image_data, np.uint8)
    img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Extract the filename from the blob and get the corresponding emotion
    filename = blob.name.split('/')[-1]  # Extract filename from path
    emotion = emotion_dict.get(filename, "Unknown")  # Fetch emotion label using filename
    
    return img, emotion

In [None]:
bucket = client.bucket(bucket_name)
# Example: List files in the bucket
blobs = bucket.list_blobs()
# for blob in blobs:
#     print(blob.name)
# Define the path to the image file
blob_path = 'DataSets/FacialEmotionRecognitionImageDataset_v1/'

# Get the blobs in the directory
blobs = list(bucket.list_blobs(prefix=blob_path))
blobs = [blob for blob in blobs if 'Ahegao' not in blob.name]

In [None]:
# Function to read image from GCS
def read_image_from_gcs(blob):
    """Reads an image from Google Cloud Storage into a numpy array."""
    image_data = blob.download_as_bytes()
    image_array = np.frombuffer(image_data, np.uint8)
    img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Print the file count and show random sample of images from the dataset
print(f"File count: {len(blobs)}")

# Display a random set of images
num_images = 3
num_samples = 3
plt.figure(figsize=(13, 13))

for i in range(num_images):
    for j in range(num_samples):
        random_blob = np.random.choice(blobs)  # Make sure 'blobs' contains the list of all blob objects you want to choose from
        img, emotion_label = match_image_to_category_label(random_blob, emotion_dict)
        
        plt.subplot(num_images, num_samples, i * num_samples + j + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(emotion_label)  # Display the emotion label as the title

plt.show()

In [None]:
# This code streams from google storage bucket
# # Using glob to list all JPEG files in the specific GCS bucket directory
# filenames = tf.io.gfile.glob('gs://storage_for_all/DataSets/FacialEmotionRecognitionImageDataset_V1/*.jpg')

# # Calculate the total number of image files
# total_images = len(filenames)

# # Print the total number of image files
# print("Total number of image files:", total_images)