In [1]:
import torch
import cv2
import numpy as np
import os
from src.system.interface import AnnotatorInterface
import torch.nn as nn
import pytorch_lightning as pl



# Load the model
model = torch.load('./weights/fastpose.pth', map_location=torch.device('cpu'))

  from .autonotebook import tqdm as notebook_tqdm
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [10]:
def train_dataloader(self):
        # train loader
        train_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True
        )
        return train_loader

def val_dataloader(self):
    # validation loader
    val_loader = torch.utils.data.DataLoader(
        self.val_dataset,
        batch_size=self.batch_size,
        shuffle=False
    )
    return val_loader

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

TOT_ACTION_CLASSES = 2

class ActionClassificationLSTM(nn.Module):
    def __init__(self, input_features, hidden_dim, learning_rate=0.001):
        super().__init__()
        self.lstm = nn.LSTM(input_features, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, TOT_ACTION_CLASSES)
        self.learning_rate = learning_rate

    def forward(self, x):
        lstm_out, (ht, ct) = self.lstm(x)
        return self.linear(ht[-1])

    def training_step(self, batch):
        x, y = batch
        y = torch.squeeze(y).long()
        y_pred = self(x)
        loss = F.cross_entropy(y_pred, y)
        prob = F.softmax(y_pred, dim=1)
        pred = prob.data.max(dim=1)[1]
        acc = (pred == y).float().mean()
        return loss, acc

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False
        )

In [18]:
import numpy as np
import torch

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, x_file, y_file):
        self.x = np.loadtxt(x_file)
        self.y = np.loadtxt(y_file, dtype=int)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return torch.FloatTensor(self.x[idx]), torch.LongTensor([self.y[idx]])

In [19]:
def train(model, train_loader, num_epochs, device):
    optimizer = optim.Adam(model.parameters(), lr=model.learning_rate)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_acc = 0
        for batch in train_loader:
            x, y = [b.to(device) for b in batch]
            optimizer.zero_grad()
            loss, acc = model.training_step((x, y))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_acc += acc.item()

        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")

In [20]:
# Usage
input_features = 64  # Adjust based on your data
hidden_dim = 128  # Adjust as needed
batch_size = 32  # Adjust as needed

# Paths to your data files
x_file = '/home/goyal/Desktop/fastpose/keypoints/x.txt'
y_file = '/home/goyal/Desktop/fastpose/keypoints/y.txt'

# Create dataset and dataloader
train_dataset = CustomDataset(x_file, y_file)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

# Get input_features from the data
input_features = train_dataset.x.shape[1]

# Create model
model = ActionClassificationLSTM(input_features, hidden_dim)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train
num_epochs = 100  # Adjust as needed
train(model, train_loader, num_epochs, device)

ValueError: Wrong number of columns at line 2

In [15]:
dataset_dir = "data/images"
image_files = [f for f in os.listdir(dataset_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
# Limit to the first 100 images
image_files = image_files[:2000]

In [16]:
def draw_results(image, person):
    # Draw bounding box
    bbox = person['bbox']
    x1, y1 = int(bbox.x), int(bbox.y)
    x2, y2 = int(bbox.x + bbox.width), int(bbox.y + bbox.height)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Draw keypoints if available
    if person['pose_2d'] is not None:
        keypoints = person['pose_2d'].get_joints()
        for i, (x, y) in enumerate(keypoints):
            confidence = person['confidence'][i]
            if confidence > 0.5:  # You can adjust this threshold
                cv2.circle(image, (int(x), int(y)), 3, (0, 0, 255), -1)

    return image


In [17]:
annotator = AnnotatorInterface.build(max_persons=5)

layer     filters    size              input                output
    0 conv     16  3 x 3 / 1   416 x 416 x   3   ->   416 x 416 x  16
    1 max          2 x 2 / 2   416 x 416 x  16   ->   208 x 208 x  16
    2 conv     32  3 x 3 / 1   208 x 208 x  16   ->   208 x 208 x  32
    3 max          2 x 2 / 2   208 x 208 x  32   ->   104 x 104 x  32
    4 conv     64  3 x 3 / 1   104 x 104 x  32   ->   104 x 104 x  64
    5 max          2 x 2 / 2   104 x 104 x  64   ->    52 x  52 x  64
    6 conv    128  3 x 3 / 1    52 x  52 x  64   ->    52 x  52 x 128
    7 max          2 x 2 / 2    52 x  52 x 128   ->    26 x  26 x 128
    8 conv    256  3 x 3 / 1    26 x  26 x 128   ->    26 x  26 x 256
    9 max          2 x 2 / 2    26 x  26 x 256   ->    13 x  13 x 256
   10 conv    512  3 x 3 / 1    13 x  13 x 256   ->    13 x  13 x 512
   11 max          2 x 2 / 1    13 x  13 x 512   ->    13 x  13 x 512
   12 conv   1024  3 x 3 / 1    13 x  13 x 512   ->    13 x  13 x1024
   13 conv   1024  3 x 

In [18]:
keypoints_file = "keypoints/x.txt"
labels_file = "keypoints/y.txt"

with open(keypoints_file, "w") as kp_file, open(labels_file, "w") as label_file:
    for image_file in image_files:
        image_path = os.path.join(dataset_dir, image_file)
        image = cv2.imread(image_path)
        
        if image is None:
            print(f"Failed to read image: {image_file}")
            continue
        
        # Perform detection and keypoint estimation
        persons = annotator.update(image)
        
        if persons:
            for person in persons:
                # Extract 2D keypoints
                if person['pose_2d'] is not None:
                    keypoints_2d = person['pose_2d'].get_joints().flatten().tolist()
                    
                    # Write keypoints to file
                    kp_file.write(" ".join(map(str, keypoints_2d)) + "\n")
                    
                    # Write label (image filename) to file
                    label_file.write(image_file + "\n")
            
            # Visualize results
            visualized_image = image.copy()
            for person in persons:
                try:
                    visualized_image = draw_results(visualized_image, person)
                except Exception as e:
                    print(f"Error drawing results for {image_file}: {str(e)}")
                    print(f"Person data: {person}")
            
            # Save the visualized image
            output_path = os.path.join('./output/', f"annotated_{image_file}")
            cv2.imwrite(output_path, visualized_image)
            
            print(f"Processed {image_file} - Persons detected: {len(persons)}")
        else:
            print(f"Processed {image_file} - No persons detected")

# Terminate the annotator
annotator.terminate()
 
        
 

Processed 000000461129.jpg - No persons detected


Processed 000000437810.jpg - No persons detected
Processed 000000473489.jpg - No persons detected
Processed 000000386540.jpg - No persons detected
Processed 000000494716.jpg - No persons detected
Processed 000000061982.jpg - No persons detected
Processed 000000476802.jpg - No persons detected
Processed 000000317684.jpg - No persons detected
Processed 000000349511.jpg - No persons detected
Processed 000000562904.jpg - No persons detected
Processed 000000457720.jpg - No persons detected
Processed 000000365066.jpg - No persons detected
Processed 000000114653.jpg - No persons detected
Processed 000000126144.jpg - No persons detected
Processed 000000547738.jpg - No persons detected
Processed 000000457087.jpg - No persons detected
Processed 000000310128.jpg - No persons detected
Processed 000000537128.jpg - No persons detected
Processed 000000183809.jpg - No persons detected
Processed 000000374374.jpg - No persons detected
Processed 000000137256.jpg - No persons detected
Processed 0000001250

  cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0,1))).data


Processed 000000414067.jpg - No persons detected
Processed 000000314791.jpg - No persons detected
Processed 000000349170.jpg - No persons detected
Processed 000000399973.jpg - No persons detected
Processed 000000499922.jpg - No persons detected
Processed 000000369460.jpg - No persons detected
Processed 000000142779.jpg - No persons detected
Processed 000000320979.jpg - No persons detected
Processed 000000139105.jpg - No persons detected
Processed 000000348730.jpg - No persons detected
Processed 000000417961.jpg - No persons detected
Processed 000000368220.jpg - No persons detected
Processed 000000441058.jpg - No persons detected
Processed 000000364247.jpg - No persons detected
Processed 000000356922.jpg - No persons detected
Processed 000000571550.jpg - No persons detected
Processed 000000051674.jpg - No persons detected
Processed 000000084929.jpg - No persons detected
Processed 000000144695.jpg - No persons detected
Processed 000000458594.jpg - No persons detected
Processed 0000004780

In [20]:
# Print summary
print(f"\nTotal images processed: {len(image_files)}")
print(f"Images with detected keypoints: {sum(1 for line in open(labels_file))}")


Total images processed: 2000
Images with detected keypoints: 44
