In [27]:
import matplotlib.pyplot as plt # for plotting
import numpy as np # for transformation
import pandas as pd
from PIL import Image
import os
import shutil
from os.path import exists

import torch # PyTorch package
import torchvision # load datasets
import torchvision.transforms as transforms # transform data
import torch.nn as nn # basic building block for neural neteorks
import torch.nn.functional as F # import convolution functions like Relu
import torch.optim as optim # optimzer

In [28]:
# Load the JSON data into a python dictionary
train_data = pd.read_json("train_data.json")

#print(train_data)

# Clean out the games that have no reviews
train_df = train_data.dropna(subset=["sentiment"])

# explode the dataset
train_df_expanded=train_df.explode("screenshots", ignore_index=True)


In [29]:
train_df_expanded

Unnamed: 0,appid,release,title,price,sentiment,reviews,percentage,tags,screenshots
0,2460480,2023-07-19,KILL CRAB,0,Very Positive,77.0,81.0,"[3D, Action, Action-Adventure, Adventure, Atmo...",2460480_ss_7d0cc1ba5160475a863d3cab18ae20c5319...
1,2460480,2023-07-19,KILL CRAB,0,Very Positive,77.0,81.0,"[3D, Action, Action-Adventure, Adventure, Atmo...",2460480_ss_ed40cec9267023b99fb3bf571a5c74556bb...
2,2460480,2023-07-19,KILL CRAB,0,Very Positive,77.0,81.0,"[3D, Action, Action-Adventure, Adventure, Atmo...",2460480_ss_ef88923d635e37c1a63ff2658f11a0ed489...
3,2460480,2023-07-19,KILL CRAB,0,Very Positive,77.0,81.0,"[3D, Action, Action-Adventure, Adventure, Atmo...",2460480_ss_f46c745c81786d4a8aa18187f2d942aff7a...
4,2460480,2023-07-19,KILL CRAB,0,Very Positive,77.0,81.0,"[3D, Action, Action-Adventure, Adventure, Atmo...",2460480_ss_f8008bc1bc867afb00beb2e8844d6386e32...
...,...,...,...,...,...,...,...,...,...
41786,2684300,2024-01-04,Just skill shooter 2,2140,Mostly Positive,17.0,70.0,"[Action, Action RPG, Arcade, FPS, First-Person...",2684300_ss_46ae97a9290948442fcb2418af5c161f856...
41787,2684300,2024-01-04,Just skill shooter 2,2140,Mostly Positive,17.0,70.0,"[Action, Action RPG, Arcade, FPS, First-Person...",2684300_ss_9031aae2fe1d233dbcc1269a2486fd86d87...
41788,2684300,2024-01-04,Just skill shooter 2,2140,Mostly Positive,17.0,70.0,"[Action, Action RPG, Arcade, FPS, First-Person...",2684300_ss_a182b3e3e54a88360f3d0ec05efd8d64a28...
41789,2684300,2024-01-04,Just skill shooter 2,2140,Mostly Positive,17.0,70.0,"[Action, Action RPG, Arcade, FPS, First-Person...",2684300_ss_b61bdccd5592ed797378e1a50985c6eb45c...


In [30]:
# Exploration and minor cleaning
print(train_data.columns)

print(train_data["sentiment"].value_counts())

dupe = train_data["appid"].value_counts()
if 2 in dupe.values:
    print("There is at least one value with a count of 2.")
else:
    print("No value has a count of 2.")

Index(['appid', 'release', 'title', 'price', 'sentiment', 'reviews',
       'percentage', 'tags', 'screenshots'],
      dtype='object')
sentiment
Positive                   1650
Very Positive              1148
Mixed                       736
Mostly Positive             664
Overwhelmingly Positive     109
Mostly Negative              89
Negative                      8
Very Negative                 1
Name: count, dtype: int64
No value has a count of 2.


In [None]:
# After this part it gets memory intensive, I have 16GB and it doesn't work so I used the external python script to make the images 256 256

In [31]:
# Initialize the transformation funciton
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5))
     ])

In [None]:
# Makes a PIL image and transforms
train_df_expanded["images"] = None
for i in range(len(train_df_expanded["screenshots"])//2):
    screenshot_filename = train_df_expanded["screenshots"][i]
    image_path = f"C:/Users/Beste/Desktop/AAB/images/{screenshot_filename}"
    if exists(image_path):
        with Image.open(image_path) as image:
            image = image.convert('RGB')
            transformed_image = transform(image)
            train_df_expanded.at[i, "images"] = transformed_image

# Clean out the games that have no image
train_df_expanded = train_df_expanded.dropna(subset=["images"])

In [32]:
# Create different files
def categorize_files(dataframe, source_folder, destination_folder):
    # Create destination folders if they don't exist
    categories = dataframe['sentiment'].unique()
    for category in categories:
        category_folder = os.path.join(destination_folder, category)
        if not os.path.exists(category_folder):
            os.makedirs(category_folder)

    # Iterate through rows in the DataFrame
    for index, row in dataframe.iterrows():
        filename = row['screenshots']
        source_file = os.path.join(source_folder, filename)
        if os.path.isfile(source_file):
            category = row['sentiment']
            destination_folder_category = os.path.join(destination_folder, category)
            shutil.copy(source_file, destination_folder_category)

if __name__ == "__main__":

    # Path to the big folder containing files to categorize
    source_folder = "C:/Users/Beste/Desktop/AAB/images"

    # Path to the folder where categorized files will be placed
    destination_folder = "C:/Users/Beste/Desktop/AAB/train"

    categorize_files(train_df_expanded, source_folder, destination_folder)
    print("Files categorized successfully!")

Files categorized successfully!


In [23]:
train_dataset = torchvision.datasets.ImageFolder(root='train')

In [16]:
# Create data loaders
train_loader = DataLoader(train_dataset, ...)


In [25]:
def imshow(img):
    ''' function to show image '''
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()  # convert to numpy objects
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Iterate over batches of data using a for loop
for images, labels in train_loader:
    # Call function on your images here
    imshow(torchvision.utils.make_grid(images))

    # Print the class of each image in the batch
    batch_size = images.size(0)
    print(' '.join('%s' % classes[labels[j]] for j in range(batch_size)))
    break  # Exit the loop after displaying the first batch

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Beste/Desktop/AAB/images\\1623120'