In [None]:
pip install torch torchvision opencv-python pycuda


In [None]:
import torch
import torchvision.transforms as transforms
from torchvision.models import vgg19

# Load pre-trained VGG19 model
model = vgg19(pretrained=True).features.eval()

# Define transformations to preprocess the input image/frame
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# Move model to GPU
model = model.cuda()

# Function to perform style transfer on a single frame
def style_transfer(frame, model):
    # Preprocess the frame and move to GPU
    input_tensor = preprocess(frame).unsqueeze(0).cuda()

    with torch.no_grad():
        output_tensor = model(input_tensor)

    # Post-process the output and move back to CPU
    output_tensor = output_tensor.squeeze(0).cpu()
    output_image = transforms.ToPILImage()(output_tensor)
    return output_image


In [None]:
import cv2

# Initialize video capture (use 0 for webcam, or specify video file path)
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to RGB (OpenCV uses BGR by default)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Apply the style transfer model
    output_image = style_transfer(frame, model)

    # Convert output image back to BGR for displaying
    output_image = cv2.cvtColor(np.array(output_image), cv2.COLOR_RGB2BGR)

    # Display the output frame
    cv2.imshow('Neural Style Transfer', output_image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import numpy as np

# Example CUDA kernel for a custom operation (e.g., element-wise multiplication)
cuda_code = """
__global__ void elementwise_multiply(float *a, float *b, float *c, int n) {
    int idx = threadIdx.x + blockDim.x * blockIdx.x;
    if (idx < n) {
        c[idx] = a[idx] * b[idx];
    }
}
"""

mod = SourceModule(cuda_code)
multiply = mod.get_function("elementwise_multiply")

# Example function to use the CUDA kernel
def gpu_multiply(a, b):
    n = a.size
    c = np.empty_like(a)

    # Allocate GPU memory
    a_gpu = cuda.mem_alloc(a.nbytes)
    b_gpu = cuda.mem_alloc(b.nbytes)
    c_gpu = cuda.mem_alloc(c.nbytes)

    # Copy data to GPU
    cuda.memcpy_htod(a_gpu, a)
    cuda.memcpy_htod(b_gpu, b)

    # Launch the kernel
    block_size = 256
    grid_size = (n + block_size - 1) // block_size
    multiply(a_gpu, b_gpu, c_gpu, np.int32(n), block=(block_size, 1, 1), grid=(grid_size, 1))

    # Copy the result back to CPU
    cuda.memcpy_dtoh(c, c_gpu)

    return c


In [None]:
import time

start_time = time.time()
for _ in range(100):  # Process 100 frames
    _ = style_transfer(frame, model)
end_time = time.time()

print(f"Processed 100 frames in {end_time - start_time:.2f} seconds")
