In [1]:
import os
import numpy as np
import cv2
import pandas as pd

### Pipeline para converter imagens e salvar como CSV rotulado.

- Rótulos de `Jumping`:  
    - 0: not jumping  
    - 1: jumping  

---

Configurações gerais:

In [13]:
RESOLUTION = (128, 128)
JUMPING_PATH = os.path.join(os.getcwd(), 'jumping')
NO_JUMPING_PATH = os.path.join(os.getcwd(), 'not_jumping')
CSV_PATH = 'dataset-new.csv'
NORMALIZATION = 255.0 # Nomralização de pixels (1 para não normaliziar, 255.0 -> Pixels para entre 0 e 1)

Função de extração dos dados das imagens

In [14]:
def imagesDataset(folder_path, label):
    dataset = []

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

        if img is None:
            continue

        img = cv2.resize(img, RESOLUTION)
        reshaped = (img / NORMALIZATION).reshape(-1)
       
        dataset.append(np.append(reshaped, label))
    return np.array(dataset)

Convertendo imagens em um DataFrame do pandas

In [15]:
data_jumping = imagesDataset(JUMPING_PATH, label=1)
data_no_jumping = imagesDataset(NO_JUMPING_PATH, label=0)

dataset_total = np.vstack([data_jumping, data_no_jumping])
df = pd.DataFrame(dataset_total)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16375,16376,16377,16378,16379,16380,16381,16382,16383,16384
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.011765,0.0,0.000000,0.007843,0.0,0.003922,0.0


Convertendo última coluna para `Jumping`

In [16]:
num_features = df.shape[1] - 1
df.columns = [f'{i}' for i in range(num_features)] + ['Jumping']
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16375,16376,16377,16378,16379,16380,16381,16382,16383,Jumping
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007843,0.000000,0.0,0.000000,0.007843,0.0,0.000000,0.0
2103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.011765,0.0,0.000000,0.007843,0.0,0.003922,0.0


Salvando dataset em um csv

In [17]:
df.to_csv(CSV_PATH, index=False)
print(f"CSV salvo como {CSV_PATH} | Total: {len(df)} amostras")

CSV salvo como dataset-new.csv | Total: 2105 amostras
