<a href="https://colab.research.google.com/github/yoshi70001/Pagina-Web/blob/master/captcha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont
import random
import string
import os
from tqdm import tqdm

class StyledCaptchaGenerator:
    def __init__(self,
                 width=200,
                 height=50,
                 char_length=6):
        self.width = width
        self.height = height
        self.char_length = char_length
        self.chars = string.ascii_uppercase + string.digits  # Solo mayúsculas y números como en las imágenes
        self.font_sizes = (35, 38, 40)  # Tamaños más consistentes
        self.background_colors = [
            (200, 255, 200),  # Verde claro
            (200, 200, 255),  # Azul claro
            (255, 200, 255),  # Morado claro
        ]
        self.text_colors = [
            (0, 100, 0),      # Verde oscuro
            (0, 0, 100),      # Azul oscuro
            (100, 0, 100),    # Morado oscuro
        ]

        self.fonts_dir = 'fonts'
        if not os.path.exists(self.fonts_dir):
            os.makedirs(self.fonts_dir)

    def _create_bubble_background(self):
        """Crea un fondo con burbujas como en las imágenes de ejemplo"""
        image = Image.new('RGB', (self.width, self.height), 'white')
        draw = ImageDraw.Draw(image)

        # Añadir burbujas de fondo
        for _ in range(15):
            x = random.randint(0, self.width)
            y = random.randint(0, self.height)
            size = random.randint(10, 30)
            color = random.choice(self.background_colors)
            draw.ellipse([x, y, x + size, y + size], fill=color)

        return image

    def generate_captcha(self):
        # Crear fondo con burbujas
        image = self._create_bubble_background()
        draw = ImageDraw.Draw(image)

        # Generar texto
        text = ''.join(random.choices(self.chars, k=self.char_length))

        # Posicionar caracteres
        x_offset = 20
        for char in text:
            # Seleccionar fuente y color
            font_size = random.choice(self.font_sizes)
            font = ImageFont.truetype(self._get_random_font(), font_size)
            color = random.choice(self.text_colors)

            # Añadir sombra/glow
            for offset in range(2):
                draw.text((x_offset + offset, 10 + offset), char,
                         font=font, fill=(255, 255, 255))

            # Dibujar carácter principal
            draw.text((x_offset, 10), char, font=font, fill=color)

            # Añadir burbuja detrás del carácter
            size = font_size + 10
            draw.ellipse([x_offset-5, 5, x_offset+size, size+10],
                        fill=random.choice(self.background_colors),
                        outline=None)

            # Redibujar carácter sobre la burbuja
            draw.text((x_offset, 10), char, font=font, fill=color)

            x_offset += font_size - 10

        return image, text

    def _get_random_font(self):
        fonts = [f for f in os.listdir(self.fonts_dir) if f.endswith('.ttf')]
        if not fonts:
            raise Exception("No se encontraron fuentes TTF en el directorio 'fonts'")
        return os.path.join(self.fonts_dir, random.choice(fonts))

def generate_dataset(num_samples, output_dir="styled_captcha_dataset"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        os.makedirs(os.path.join(output_dir, "images"))

    generator = StyledCaptchaGenerator()

    labels = []
    print(f"Generando {num_samples} CAPTCHAs...")
    for i in tqdm(range(num_samples)):
        image, text = generator.generate_captcha()
        image_path = os.path.join(output_dir, "images", f"captcha_{i:06d}.png")
        image.save(image_path)
        labels.append(f"{image_path},{text}")

    with open(os.path.join(output_dir, "labels.csv"), "w") as f:
        f.write("\n".join(labels))

    print(f"Dataset generado en {output_dir}")

if __name__ == "__main__":
    generate_dataset(15000)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
import string
from tqdm import tqdm
import numpy as np

class CaptchaDataset(Dataset):
    def __init__(self, labels_file, transform=None):
        self.data = pd.read_csv(labels_file, header=None, names=['image_path', 'text'])
        self.transform = transform
        self.chars = string.ascii_uppercase + string.digits
        self.char_to_idx = {char: idx for idx, char in enumerate(self.chars)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['image_path']
        text = self.data.iloc[idx]['text']

        # Cargar y transformar imagen
        image = Image.open(img_path).convert('L')  # Convertir a escala de grises
        if self.transform:
            image = self.transform(image)

        # Convertir texto a tensor
        target = torch.zeros(len(text), len(self.chars))
        for i, char in enumerate(text):
            target[i][self.char_to_idx[char]] = 1

        return image, target

class CaptchaCNN(nn.Module):
    def __init__(self, num_chars, num_classes):
        super(CaptchaCNN, self).__init__()

        # Capas convolucionales
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2),
        )

        # Calcular tamaño de salida de las capas convolucionales
        self.conv_output_size = self._get_conv_output_size()

        # Capas fully connected
        self.fc_layers = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(self.conv_output_size, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, num_chars * num_classes)
        )

        self.num_chars = num_chars
        self.num_classes = num_classes

    def _get_conv_output_size(self):
        # Pasar un batch dummy para calcular el tamaño de salida
        x = torch.randn(1, 1, 50, 200)
        x = self.conv_layers(x)
        return x.numel() // x.size(0)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        # Reshape para obtener predicciones por carácter
        x = x.view(-1, self.num_chars, self.num_classes)
        return x

class CaptchaTrainer:
    def __init__(self, num_chars=6, image_width=200, image_height=50):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.num_chars = num_chars
        self.chars = string.ascii_uppercase + string.digits
        self.num_classes = len(self.chars)

        # Definir transformaciones
        self.transform = transforms.Compose([
            transforms.Resize((image_height, image_width)),
            transforms.ToTensor(),
        ])

    def train(self, dataset_path, batch_size=32, epochs=50, learning_rate=0.001):
        # Crear dataset y dataloaders
        dataset = CaptchaDataset(dataset_path, transform=self.transform)
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Crear modelo
        model = CaptchaCNN(self.num_chars, self.num_classes).to(self.device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

        best_loss = float('inf')
        patience = 10
        patience_counter = 0

        for epoch in range(epochs):
            # Entrenamiento
            model.train()
            train_loss = 0
            train_correct = 0
            train_total = 0

            pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}')
            for images, labels in pbar:
                images, labels = images.to(self.device), labels.to(self.device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs.view(-1, self.num_classes), labels.view(-1, self.num_classes))
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

                # Calcular accuracy
                predictions = outputs.argmax(dim=2)
                targets = labels.argmax(dim=2)
                train_correct += (predictions == targets).sum().item()
                train_total += targets.numel()

                pbar.set_postfix({'loss': train_loss / (pbar.n + 1),
                                'accuracy': train_correct / train_total})

            # Validación
            model.eval()
            val_loss = 0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    outputs = model(images)
                    loss = criterion(outputs.view(-1, self.num_classes), labels.view(-1, self.num_classes))
                    val_loss += loss.item()

                    predictions = outputs.argmax(dim=2)
                    targets = labels.argmax(dim=2)
                    val_correct += (predictions == targets).sum().item()
                    val_total += targets.numel()

            val_loss = val_loss / len(val_loader)
            val_accuracy = val_correct / val_total

            print(f'\nValidation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

            # Early stopping
            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), 'best_model.pth')
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print("Early stopping triggered")
                break

            scheduler.step(val_loss)

        return model

if __name__ == "__main__":
    trainer = CaptchaTrainer()
    model = trainer.train("styled_captcha_dataset/labels.csv")

In [None]:
import torch
import torch.nn as nn
from PIL import Image
import string
import torchvision.transforms as transforms
from typing import List, Tuple
import matplotlib.pyplot as plt

class CaptchaTester:
    def __init__(self, model_path: str, image_height: int = 50, image_width: int = 200):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.chars = string.ascii_uppercase + string.digits
        self.char_to_idx = {char: idx for idx, char in enumerate(self.chars)}
        self.idx_to_char = {idx: char for char, idx in self.char_to_idx.items()}

        # Cargar modelo
        self.model = CaptchaCNN(num_chars=6, num_classes=len(self.chars)).to(self.device)
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.eval()

        # Preprocesamiento
        self.transform = transforms.Compose([
            transforms.Resize((image_height, image_width)),
            transforms.ToTensor(),
        ])

    def preprocess_image(self, image_path: str) -> torch.Tensor:
        """Preprocesa una imagen para la inferencia"""
        image = Image.open(image_path).convert('L')
        image = self.transform(image)
        return image.unsqueeze(0).to(self.device)

    def predict(self, image_path: str) -> Tuple[str, List[float]]:
        """
        Predice el texto del CAPTCHA y retorna las probabilidades
        """
        # Preprocesar imagen
        image = self.preprocess_image(image_path)

        # Realizar predicción
        with torch.no_grad():
            outputs = self.model(image)
            probabilities = torch.softmax(outputs, dim=2)
            predictions = torch.argmax(outputs, dim=2)

        # Convertir predicciones a texto
        predicted_text = ''
        confidence_scores = []

        for i in range(predictions.size(1)):
            char_idx = predictions[0, i].item()
            char = self.idx_to_char[char_idx]
            predicted_text += char

            # Obtener probabilidad de la predicción
            confidence = probabilities[0, i, char_idx].item()
            confidence_scores.append(confidence)

        return predicted_text, confidence_scores

    def visualize_prediction(self, image_path: str, save_path: str = None):
        """
        Visualiza la imagen con la predicción y las probabilidades
        """
        # Obtener predicción
        text, confidences = self.predict(image_path)

        # Cargar y mostrar imagen
        image = Image.open(image_path)
        plt.figure(figsize=(12, 4))

        # Subplot para la imagen
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.title(f'Predicción: {text}')
        plt.axis('off')

        # Subplot para las probabilidades
        plt.subplot(1, 2, 2)
        plt.bar(list(text), confidences)
        plt.title('Probabilidades por carácter')
        plt.ylim(0, 1)

        if save_path:
            plt.savefig(save_path)
        plt.show()

    def batch_test(self, image_paths: List[str]) -> List[Tuple[str, List[float]]]:
        """
        Prueba el modelo con múltiples imágenes
        """
        results = []
        for path in image_paths:
            text, confidences = self.predict(path)
            results.append((path, text, confidences))
            print(f"Imagen: {path}")
            print(f"Texto predicho: {text}")
            print(f"Confianza promedio: {sum(confidences)/len(confidences):.2%}\n")
        return results

def test_model():
    """
    Función principal para probar el modelo
    """
    # Inicializar el tester
    tester = CaptchaTester(
        model_path='best_model.pth',  # Asegúrate de que este sea el path correcto
        image_height=50,
        image_width=200
    )

    # Ejemplo de uso individual
    image_path = "/content/styled_captcha_dataset/images/captcha_004000.png"
    predicted_text, confidences = tester.predict(image_path)
    print(f"Texto predicho: {predicted_text}")
    print(f"Confianzas: {[f'{conf:.2%}' for conf in confidences]}")

    # Visualizar predicción
    tester.visualize_prediction(image_path)

    # Ejemplo de prueba por lotes
    test_images = [
        "/content/styled_captcha_dataset/images/captcha_004000.png",
        "/content/styled_captcha_dataset/images/captcha_001000.png",
        "/content/styled_captcha_dataset/images/captcha_003000.png"
    ]
    results = tester.batch_test(test_images)

if __name__ == "__main__":
    test_model()