In [30]:
import cv2
import numpy as np
import pandas as pd
import os
from PIL import Image

def process_meter_images(data_root='train'):
    """处理CSV文件中指定的所有电表图像，并切分数字"""
    # 创建输出目录
    csv_path = f'{data_root}/data_detect.csv'  # CSV文件路径
    image_dir = f"{data_root}/Dataset"      # 图像文件夹路径
    output_dir=f"{data_root}/Dataset_process1"
    cut_dir=f"{data_root}/cut_num"
    for dir_path in [output_dir, cut_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
    
    # 读取CSV文件
    df = pd.read_csv(csv_path)
    
    results = []
    
    # 遍历CSV中的每一行
    for _, row in df.iterrows():
        filename = row['filename']
        number_str = str(row['number'])  # 保留小数点，用于确定小数点位
        
        # 提取整数部分和小数部分
        if '.' in number_str:
            integer_part, decimal_part = number_str.split('.')
        else:
            integer_part = number_str
            decimal_part = ''
        
        # 构建完整数字字符串（补零到6位）
        full_number = f"{integer_part}{decimal_part}".zfill(6)
        
        xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']
        
        # 构建图像路径
        image_path = os.path.join(image_dir, filename)
        
        try:
            # 处理图像
            original_img, processed_img, digit_roi = process_single_image(image_path, xmin, ymin, xmax, ymax)
            
            # 切分数字（固定6个，索引从右到左）
            cut_digits = split_fixed_digits(digit_roi, full_number, os.path.join(cut_dir, filename))
            
            # 保存结果
            cv2.imwrite(os.path.join(output_dir, f"processed_{filename}"), processed_img)
            cv2.imwrite(os.path.join(output_dir, f"roi_{filename}"), digit_roi)
            
            results.append({
                'filename': filename,
                'number': full_number,
                'image': processed_img,
                'roi': digit_roi,
                'cut_digits': cut_digits
            })
            
            print(f"成功处理: {filename} -> {full_number}")
        except Exception as e:
            print(f"处理失败: {filename}, 错误: {str(e)}")
    
    return results

def process_single_image(image_path, xmin, ymin, xmax, ymax):
    """处理单张图像并提取读数区域"""
    # 读取图像
    img = cv2.imread(image_path)
    
    # 图像预处理 - 去反光
    corrected_img = reduce_reflection(img)
    
    # 提取读数区域
    digit_roi = corrected_img[ymin:ymax, xmin:xmax]
    
    # 增强数字区域对比度
    enhanced_roi = enhance_contrast(digit_roi)
    
    return img, corrected_img, enhanced_roi

def reduce_reflection(img):
    """减少图像反光"""
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    limg = cv2.merge((cl, a, b))
    result = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
    return result

def enhance_contrast(img):
    """增强图像对比度"""
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    blur = cv2.GaussianBlur(gray, (3, 3), 0)
    thresh = cv2.adaptiveThreshold(
        blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
    )
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    
    return opening

def split_fixed_digits(roi, number_str, output_prefix):
    """将ROI固定切分为6个数字区域（索引从右到左）"""
    # 确保数字字符串长度为6
    if len(number_str) > 6:
        number_str = number_str[-6:]  # 截取最后6位
    elif len(number_str) < 6:
        number_str = number_str.zfill(6)  # 左侧补零到6位
    
    # 获取ROI宽度并计算每个数字的大致宽度
    roi_height, roi_width = roi.shape[:2]
    digit_width = roi_width // 6
    
    cut_digits = []
    
    # 切分并保存6个数字（索引从右到左）
    for i in range(6):
        # 从右到左映射索引
        reversed_index = 5 - i
        
        # 计算数字区域的左右边界
        start_x = reversed_index * digit_width
        end_x = (reversed_index + 1) * digit_width
        
        # 提取数字区域并添加边距
        margin = 2
        digit_roi = roi[:, max(0, start_x-margin):min(end_x+margin, roi_width)]
        
        # 获取对应的数字标签
        digit_value = number_str[reversed_index]
        
        # 保存切分的数字（索引从右到左）
        output_path = f"{output_prefix}_{i}_{digit_value}.jpg"
        cv2.imwrite(output_path, digit_roi)
        cut_digits.append((i, digit_value, digit_roi))
    
    return cut_digits


In [31]:
# 处理所有图像
results = process_meter_images('train')
    
print(f"处理完成，共处理{len(results)}张图像")

成功处理: hefei_3188.jpg -> 095759
成功处理: hefei_3190.jpg -> 196520
成功处理: hefei_3191.jpg -> 022627
成功处理: hefei_3192.jpg -> 057020
成功处理: hefei_3194.jpg -> 025660
成功处理: hefei_3195.jpg -> 075097
成功处理: hefei_3196.jpg -> 002723
成功处理: hefei_3198.jpg -> 183045
成功处理: hefei_3199.jpg -> 079032
成功处理: hefei_3200.jpg -> 059815
成功处理: hefei_3203.jpg -> 019230
成功处理: hefei_3204.jpg -> 067635
成功处理: hefei_3205.jpg -> 125458
成功处理: hefei_3208.jpg -> 055985
成功处理: hefei_3209.jpg -> 055467
成功处理: hefei_3210.jpg -> 064021
成功处理: hefei_3212.jpg -> 034674
成功处理: hefei_3213.jpg -> 000827
成功处理: hefei_3214.jpg -> 112906
成功处理: hefei_3216.jpg -> 031849
成功处理: hefei_3217.jpg -> 063259
成功处理: hefei_3218.jpg -> 090412
成功处理: hefei_3220.jpg -> 102934
成功处理: hefei_3221.jpg -> 041353
成功处理: hefei_3222.jpg -> 036731
成功处理: hefei_3224.jpg -> 348976
成功处理: hefei_3225.jpg -> 120402
成功处理: hefei_3226.jpg -> 002084
成功处理: hefei_3228.jpg -> 107204
成功处理: hefei_3229.jpg -> 030739
成功处理: hefei_3230.jpg -> 126352
成功处理: hefei_3232.jpg -> 050302
成功处理: he

In [32]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import pandas as pd
import numpy as np
from PIL import Image

# 定义数据集类
class MeterDigitDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = sorted([f for f in os.listdir(root_dir) if f.endswith('.jpg')])
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.image_files[idx])
        image = Image.open(img_path).convert('RGB')
        
        # 从文件名提取真实标签（例如：hefei_3188_0_9.jpg 中的 9）
        parts = self.image_files[idx].split('_')
        true_digit = int(parts[-1].split('.')[0])
        
        if self.transform:
            image = self.transform(image)
            
        return image, true_digit, self.image_files[idx]

In [33]:
import torch
import torch.nn as nn
import torchvision.models as models

class SEModule(nn.Module):
    """通道注意力机制(Squeeze-and-Excitation)"""
    def __init__(self, channels, reduction=16):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class ResidualBlock(nn.Module):
    """简单残差块"""
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                              stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                              stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # 确保残差连接的维度匹配
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                         stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(residual)
        out = self.relu(out)
        return out

class AlexNetEnhanced(nn.Module):
    """增强版AlexNet，添加残差连接和注意力机制"""
    def __init__(self, num_classes=10, se_reduction=16, dropout_rate=0.5):
        super(AlexNetEnhanced, self).__init__()
        self.alexnet = models.alexnet(pretrained=True)
        
        # 冻结前10层参数（原始AlexNet共13层特征层，前10层对应前两组卷积）
        for param in list(self.alexnet.parameters())[:10]:  
            param.requires_grad = False
        
        # 提取原始特征层（共13层：0-12）
        original_features = list(self.alexnet.features)
        
        # ---------------------- 特征层分组优化 ----------------------
        # 第1组：输入3→64通道，输出尺寸: (64, 55, 55)
        group1 = original_features[:3]  # Conv2d + ReLU + MaxPool2d(kernel_size=3, stride=2)
        
        # 第2组：64→192通道，输出尺寸: (192, 27, 27)
        group2 = original_features[3:6]  # Conv2d + ReLU + MaxPool2d(kernel_size=3, stride=2)
        
        # 第3组：192→384通道，输出尺寸: (384, 13, 13)
        group3 = original_features[6:8]  # Conv2d + ReLU（原模型此处为两层Conv2d，需调整）
        
        # 第4组：384→256通道，输出尺寸: (256, 13, 13)
        group4 = original_features[8:10]  # Conv2d + ReLU
        
        # 第5组：256→256通道，输出尺寸: (256, 6, 6)
        group5 = original_features[10:13]  # Conv2d + ReLU + MaxPool2d(kernel_size=3, stride=2)
        
        # ---------------------- 添加增强模块 ----------------------
        enhanced_features = (
            group1 + 
            group2 + 
            [ResidualBlock(192, 192), SEModule(192, reduction=se_reduction)] +  # 第2组后添加
            group3 + 
            [ResidualBlock(384, 384), SEModule(384, reduction=se_reduction)] +  # 第3组后添加
            group4 + 
            [ResidualBlock(256, 256), SEModule(256, reduction=se_reduction)] +  # 第4组后添加
            group5
        )
        
        # 组合特征层
        self.alexnet.features = nn.Sequential(*enhanced_features)
        
        # ---------------------- 分类器优化 ----------------------
        self.alexnet.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 6 * 6, 2048),  # 减少全连接层参数
            nn.BatchNorm1d(2048),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(2048, num_classes)
        )
    
    def forward(self, x):
        # 调试时可取消注释，打印各层特征图尺寸
        # print("输入尺寸:", x.shape)
        # for layer in self.alexnet.features:
        #     x = layer(x)
        #     print(f"层输出尺寸:", x.shape)
        x = self.alexnet.features(x)
        x = torch.flatten(x, 1)
        x = self.alexnet.classifier(x)
        return x

In [57]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=10, save_dir='checkpoints'):
    """训练模型并定期保存检查点"""
    os.makedirs(save_dir, exist_ok=True)  # 创建保存目录
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels, _ in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        epoch_loss = running_loss / len(dataloader)
        epoch_acc = 100. * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%')
        
        # 保存当前检查点
        checkpoint_path = os.path.join(save_dir, f'checkpoint_epoch_{epoch+1}.pth')
        torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': epoch_loss,
            'accuracy': epoch_acc
        }, checkpoint_path)
        
        # 保存最佳模型
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_path = os.path.join(save_dir, 'best_model.pth')
            torch.save(model.state_dict(), best_model_path)
            print(f'Best model saved with accuracy: {best_acc:.2f}%')
    
    # 保存最终模型
    final_model_path = os.path.join(save_dir, 'final_model.pth')
    torch.save(model.state_dict(), final_model_path)
    print(f'Final model saved at: {final_model_path}')
    
    return model

In [58]:
# 评估模型并保存详细结果
def evaluate_and_save_results(model, dataloader, data_root='train'):
    output_csv=f'{data_root}/digital_cut.csv'
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    results = []
    
    with torch.no_grad():
        for inputs, true_digits, filenames in dataloader:
            inputs, true_digits = inputs.to(device), true_digits.to(device)
            outputs = model(inputs)
            _, predicted_digits = outputs.max(1)
            
            # 保存每个数字的预测结果和真实标签
            for filename, pred, true in zip(filenames, predicted_digits, true_digits):
                results.append({
                    'filename': filename,
                    '预测': int(pred),
                    '实际': int(true)
                })
    
    # 保存到CSV
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"数字识别详细结果已保存到 {output_csv}")
    
    return results

In [59]:
# 从详细结果文件拼接完整电表读数
def combine_digits_from_csv(data_root='train'):
    # 读取详细结果
    input_csv=f'{data_root}/digital_cut.csv'
    output_csv=f'{data_root}/digital_compair.csv'
    df = pd.read_csv(input_csv)
    
    # 创建结果列表
    combined_results = []
    
    # 按基础文件名分组（例如：hefei_3188）
    for base_name, group in df.groupby(lambda x: '_'.join(df.iloc[x]['filename'].split('_')[:2])):
        # 确保有6个数字
        if len(group) != 6:
            print(f"警告: {base_name} 的数字数量不是6个，跳过")
            continue
        
        # 按数字位置排序（索引0-5）
        group = group.sort_values(by='filename', key=lambda x: x.str.split('_').str[-2].astype(int))
        
        # 提取预测数字
        predicted_digits = group['预测'].tolist()
        
        # 计算最终值（从右到左，每个位置乘以对应权重）
        weights = [0.1, 1, 10, 100, 1000, 10000]  # 对应索引0-5的权重
        final_value = sum(d * w for d, w in zip(predicted_digits, weights))
        
        # 构建原始文件名（例如：hefei_3188.jpg）
        original_filename = base_name + '.jpg'
        
        # 添加到结果
        combined_results.append({
            'filename': original_filename,
            'num': final_value
        })
    
    # 保存拼接结果
    result_df = pd.DataFrame(combined_results)
    result_df.to_csv(output_csv, index=False)
    print(f"拼接后的电表读数已保存到 {output_csv}")
    
    return combined_results

In [60]:

def train_auto(data_root='train', save_dir='models'):
    """主训练函数，添加模型保存目录参数"""
    # 创建主保存目录
    os.makedirs(save_dir, exist_ok=True)
    
    # 数据预处理
    train_dir = f'{data_root}/cut_num'
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    # 加载数据集
    dataset = MeterDigitDataset(root_dir=train_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # 初始化模型
    model = AlexNetEnhanced(num_classes=10)
    
    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # 创建特定于此次训练的子目录
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    training_save_dir = os.path.join(save_dir, f'training_{timestamp}')
    os.makedirs(training_save_dir, exist_ok=True)
    
    # 训练模型并保存检查点
    print(f"开始训练模型，保存路径: {training_save_dir}")
    model = train_model(model, dataloader, criterion, optimizer, 
                        num_epochs=10, save_dir=training_save_dir)
    
    # 评估模型并保存详细结果（修改保存路径）
    print("评估模型并计算正确率...")
    results = evaluate_and_save_results(model, dataloader, data_root=training_save_dir)
    
    # 计算整体正确率
    if results:
        total_predictions = len(results)
        correct_predictions = sum(1 for item in results if item['预测'] == item['实际'])
        accuracy = (correct_predictions / total_predictions) * 100
        print(f"\n模型整体正确率: {accuracy:.2f}%")
    
    # 从详细结果拼接完整电表读数
    print("拼接完整电表读数...")
    combine_digits_from_csv()
    
    return model, training_save_dir  # 返回模型和保存路径

In [38]:
combine_digits_from_csv()

拼接后的电表读数已保存到 train/digital_compair.csv


[{'filename': 'hefei_3188.jpg.jpg', 'num': 9575.9},
 {'filename': 'hefei_3190.jpg.jpg', 'num': 19652.0},
 {'filename': 'hefei_3191.jpg.jpg', 'num': 2262.7},
 {'filename': 'hefei_3192.jpg.jpg', 'num': 5702.0},
 {'filename': 'hefei_3194.jpg.jpg', 'num': 2566.0},
 {'filename': 'hefei_3195.jpg.jpg', 'num': 7509.7},
 {'filename': 'hefei_3196.jpg.jpg', 'num': 272.3},
 {'filename': 'hefei_3198.jpg.jpg', 'num': 18304.5},
 {'filename': 'hefei_3199.jpg.jpg', 'num': 7903.2},
 {'filename': 'hefei_3200.jpg.jpg', 'num': 5981.5},
 {'filename': 'hefei_3203.jpg.jpg', 'num': 1921.0},
 {'filename': 'hefei_3204.jpg.jpg', 'num': 6763.5},
 {'filename': 'hefei_3205.jpg.jpg', 'num': 12545.8},
 {'filename': 'hefei_3208.jpg.jpg', 'num': 5598.5},
 {'filename': 'hefei_3209.jpg.jpg', 'num': 5546.1},
 {'filename': 'hefei_3210.jpg.jpg', 'num': 6402.1},
 {'filename': 'hefei_3212.jpg.jpg', 'num': 3467.4},
 {'filename': 'hefei_3213.jpg.jpg', 'num': 82.7},
 {'filename': 'hefei_3214.jpg.jpg', 'num': 11290.6},
 {'filename

In [None]:
# 训练模型并自动保存
model, save_path = train_auto(data_root='train', save_dir='models')

# 打印保存路径
print(f"模型已保存至: {save_path}")



开始训练模型，保存路径: models\training_20250527_161850
Epoch 1/10, Loss: 0.5282, Acc: 82.76%
Best model saved with accuracy: 82.76%
Epoch 2/10, Loss: 0.1319, Acc: 95.99%
Best model saved with accuracy: 95.99%
Epoch 3/10, Loss: 0.0778, Acc: 97.66%
Best model saved with accuracy: 97.66%
Epoch 4/10, Loss: 0.0718, Acc: 97.79%
Best model saved with accuracy: 97.79%
Epoch 5/10, Loss: 0.0689, Acc: 97.91%
Best model saved with accuracy: 97.91%
Epoch 6/10, Loss: 0.0401, Acc: 98.65%
Best model saved with accuracy: 98.65%
Epoch 7/10, Loss: 0.0378, Acc: 98.95%
Best model saved with accuracy: 98.95%
Epoch 8/10, Loss: 0.0309, Acc: 99.15%
Best model saved with accuracy: 99.15%
Epoch 9/10, Loss: 0.0320, Acc: 99.03%
Epoch 10/10, Loss: 0.0534, Acc: 98.23%
Final model saved at: models\training_20250527_161850\final_model.pth
评估模型并计算正确率...
数字识别详细结果已保存到 models\training_20250527_161850/digital_cut.csv

模型整体正确率: 99.76%
拼接完整电表读数...
拼接后的电表读数已保存到 train/digital_compair.csv
模型已保存至: models\training_20250527_161850




AlexNetEnhanced(
  (alexnet): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): ResidualBlock(
        (conv1): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential()
      )
      (7): SEModule(
        (avg_pool): AdaptiveAvgPool2

In [51]:
# 测试函数
def test_auto(data_root='test'):
    test_dir=f'{data_root}/cut_num'
    
    results = process_meter_images('test')
    
    print(f"处理完成，共处理{len(results)}张图像")
    # 数据预处理
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    # 加载数据集
    dataset = MeterDigitDataset(root_dir=test_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # 评估模型并保存详细结果
    print("评估模型并计算正确率...")
    results = evaluate_and_save_results(model, dataloader,data_root='test')
    
    # 计算整体正确率
    if results:
        total_predictions = len(results)
        correct_predictions = sum(1 for item in results if item['预测'] == item['实际'])
        accuracy = (correct_predictions / total_predictions) * 100
        print(f"\n模型整体正确率: {accuracy:.2f}%")
    
    # 从详细结果拼接完整电表读数
    print("拼接完整电表读数...")
    combine_digits_from_csv('test')

In [62]:
import pandas as pd

def calculate_accuracy(data_root='train'):
    # 读取原始数据和预测结果
    truth_csv=f'{data_root}/data.csv'
    pred_csv=f'{data_root}/digital_compair.csv'
    df_truth = pd.read_csv(truth_csv)
    df_pred = pd.read_csv(pred_csv)
    
    # 数据清洗：处理文件名后缀问题（如.hefei_3188.jpg.jpg）
    df_truth['filename'] = df_truth['filename'].str.replace('.jpg.jpg', '.jpg')  # 处理重复后缀
    df_pred['filename'] = df_pred['filename'].str.replace('.jpg.jpg', '.jpg')
    
    # 合并真实值和预测值
    merged_df = pd.merge(df_truth, df_pred, on='filename', how='inner')
    
    # 检查数据是否匹配
    if len(merged_df) == 0:
        print("警告：未找到匹配的文件")
        return 0.0
    
    # 计算绝对误差和正确率
    merged_df['truth_number'] = merged_df['number'].astype(float)
    merged_df['pred_number'] = merged_df['num'].astype(float)
    
    # 定义允许的误差范围（根据需求调整，此处以绝对误差<=0.5为例）
    merged_df['is_correct'] = np.abs(merged_df['truth_number'] - merged_df['pred_number']) <= 0.5
    
    # 计算整体正确率
    accuracy = merged_df['is_correct'].mean() * 100
    
    # 输出详细结果
    print("数据匹配结果：")
    print(merged_df[['filename', 'truth_number', 'pred_number', 'is_correct']])
    print(f"\n整体正确率：{accuracy:.2f}%")
    
    return accuracy

# 运行示例
if __name__ == "__main__":
    calculate_accuracy()

数据匹配结果：
           filename  truth_number  pred_number  is_correct
0    hefei_3191.jpg        2262.7       2262.7        True
1    hefei_3195.jpg        7509.7       7509.7        True
2    hefei_3198.jpg       18304.5      18304.5        True
3    hefei_3203.jpg        1923.0       1912.0       False
4    hefei_3221.jpg        4135.3       4135.3        True
..              ...           ...          ...         ...
162  hefei_4479.jpg        6109.1       6109.1        True
163  hefei_4496.jpg        2407.1       2407.1        True
164  hefei_4497.jpg        2681.7       2681.7        True
165  hefei_4515.jpg        3230.7       3230.7        True
166  hefei_4516.jpg        2961.3       2961.4        True

[167 rows x 4 columns]

整体正确率：92.81%


In [53]:
def convert_filename_to_id(data_root='train'):
    """
    将CSV文件中的filename列转换为纯数字ID列
    
    参数:
    - csv_path: 输入CSV文件路径（包含filename列）
    - output_path: 输出CSV文件路径（默认为原文件添加'_converted'后缀）
    
    返回:
    - 处理后的DataFrame
    """
    csv_path = f'{data_root}/digital_compair.csv'
    output_path=f'{data_root}/digital_out.csv'
    # 读取CSV文件
    df = pd.read_csv(csv_path)
    
    # 确保存在filename列
    if 'filename' not in df.columns:
        raise ValueError(f"CSV文件中未找到'filename'列: {csv_path}")
    
    # 从filename提取数字ID（处理可能的.jpg.jpg后缀）
    df['ID'] = df['filename'].apply(lambda x: 
        x.split('_')[-1]           # 提取最后一部分（如3188.jpg.jpg）
        .replace('.jpg', '')      # 移除.jpg后缀
        .split('.')[0]            # 处理可能的剩余后缀（如3188.jpg.jpg → 3188）
    )
    
    # 重命名列并调整顺序
    if 'num' in df.columns:
        df = df.rename(columns={'num': 'number'})  # 若存在num列，重命名为number
    
    # 确保输出包含ID和number列
    if not all(col in df.columns for col in ['ID', 'number']):
        raise ValueError("CSV文件中缺少必要的列（filename和num/number）")
    
    # 保存结果
    if output_path is None:
        # 默认在原文件名后添加_converted
        output_path = csv_path.replace('.csv', '_converted.csv')
    
    df[['ID', 'number']].to_csv(output_path, index=False)
    print(f"已将filename转换为ID并保存至: {output_path}")
    
    return df

In [54]:
convert_filename_to_id()

已将filename转换为ID并保存至: train/digital_out.csv


Unnamed: 0,filename,number,ID
0,hefei_3191.jpg.jpg,2262.7,3191
1,hefei_3195.jpg.jpg,7509.7,3195
2,hefei_3198.jpg.jpg,18304.5,3198
3,hefei_3203.jpg.jpg,1912.0,3203
4,hefei_3221.jpg.jpg,4135.3,3221
...,...,...,...
162,hefei_4479.jpg.jpg,6109.1,4479
163,hefei_4496.jpg.jpg,2407.1,4496
164,hefei_4497.jpg.jpg,2681.7,4497
165,hefei_4515.jpg.jpg,3230.7,4515


In [64]:
# 测试模块,加载训练好的模型

# 后续使用时加载模型
model = AlexNetEnhanced(num_classes=10)
model.load_state_dict(torch.load(os.path.join(save_path, 'best_model.pth')))
model.eval()  # 设置为评估模式

print(save_path)

test_auto()
convert_filename_to_id('test')



models\training_20250527_161850
成功处理: hefei_3191.jpg -> 022627
成功处理: hefei_3195.jpg -> 075097
成功处理: hefei_3198.jpg -> 183045
成功处理: hefei_3203.jpg -> 019230
成功处理: hefei_3221.jpg -> 041353
成功处理: hefei_3229.jpg -> 030739
成功处理: hefei_3230.jpg -> 126352
成功处理: hefei_3232.jpg -> 050302
成功处理: hefei_3242.jpg -> 063963
成功处理: hefei_3256.jpg -> 023339
成功处理: hefei_3262.jpg -> 072279
成功处理: hefei_3274.jpg -> 063205
成功处理: hefei_3277.jpg -> 128458
成功处理: hefei_3278.jpg -> 061333
成功处理: hefei_3280.jpg -> 032190
成功处理: hefei_3288.jpg -> 077486
成功处理: hefei_3293.jpg -> 018402
成功处理: hefei_3294.jpg -> 099083
成功处理: hefei_3296.jpg -> 037952
成功处理: hefei_3300.jpg -> 100138
成功处理: hefei_3304.jpg -> 031438
成功处理: hefei_3306.jpg -> 066856
成功处理: hefei_3320.jpg -> 150017
成功处理: hefei_3321.jpg -> 031565
成功处理: hefei_3326.jpg -> 035753
成功处理: hefei_3337.jpg -> 042082
成功处理: hefei_3338.jpg -> 101854
成功处理: hefei_3349.jpg -> 028278
成功处理: hefei_3352.jpg -> 052763
成功处理: hefei_3374.jpg -> 083449
成功处理: hefei_3377.jpg -> 011703
成功处理: h

Unnamed: 0,filename,number,ID
0,hefei_3191.jpg.jpg,2262.7,3191
1,hefei_3195.jpg.jpg,7509.7,3195
2,hefei_3198.jpg.jpg,18304.5,3198
3,hefei_3203.jpg.jpg,1923.0,3203
4,hefei_3221.jpg.jpg,4135.3,3221
...,...,...,...
162,hefei_4479.jpg.jpg,6109.1,4479
163,hefei_4496.jpg.jpg,2407.1,4496
164,hefei_4497.jpg.jpg,2681.7,4497
165,hefei_4515.jpg.jpg,3230.7,4515


### AlexNet模型结构详解

AlexNet是2012年ImageNet竞赛冠军模型，由Alex Krizhevsky等人提出。它是深度卷积神经网络在计算机视觉领域的里程碑，首次证明了深度网络在大规模图像分类任务上的有效性。

以下是我针对电表数字识别任务修改后的AlexNet模型结构及关键组件说明：

### 1. 整体架构概述

我的实现基于预训练的AlexNet模型，保留了原始的卷积特征提取部分，仅修改了最后的全连接分类器以适应0-9数字识别任务：

```python
class AlexNetModified(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNetModified, self).__init__()
        self.alexnet = models.alexnet(pretrained=True)
        
        # 冻结大部分预训练层
        for param in list(self.alexnet.parameters())[:-5]:
            param.requires_grad = False
        
        # 修改分类器
        self.alexnet.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
    
    def forward(self, x):
        return self.alexnet(x)
```

### 2. 卷积层与池化层配置

AlexNet的卷积部分包含5个卷积层和3个最大池化层，具体配置如下：

```
输入图像: 3通道, 224x224像素

1. 卷积层1 (Conv1):
   - 卷积核数量: 64
   - 卷积核大小: 11x11
   - 步长: 4
   - 填充: 2
   - 激活函数: ReLU
   - 输出尺寸: (64, 55, 55)
   
2. 最大池化层1 (MaxPool1):
   - 池化核大小: 3x3
   - 步长: 2
   - 输出尺寸: (64, 27, 27)
   
3. 卷积层2 (Conv2):
   - 卷积核数量: 192
   - 卷积核大小: 5x5
   - 步长: 1
   - 填充: 2
   - 激活函数: ReLU
   - 输出尺寸: (192, 27, 27)
   
4. 最大池化层2 (MaxPool2):
   - 池化核大小: 3x3
   - 步长: 2
   - 输出尺寸: (192, 13, 13)
   
5. 卷积层3 (Conv3):
   - 卷积核数量: 384
   - 卷积核大小: 3x3
   - 步长: 1
   - 填充: 1
   - 激活函数: ReLU
   - 输出尺寸: (384, 13, 13)
   
6. 卷积层4 (Conv4):
   - 卷积核数量: 256
   - 卷积核大小: 3x3
   - 步长: 1
   - 填充: 1
   - 激活函数: ReLU
   - 输出尺寸: (256, 13, 13)
   
7. 卷积层5 (Conv5):
   - 卷积核数量: 256
   - 卷积核大小: 3x3
   - 步长: 1
   - 填充: 1
   - 激活函数: ReLU
   - 输出尺寸: (256, 13, 13)
   
8. 最大池化层3 (MaxPool3):
   - 池化核大小: 3x3
   - 步长: 2
   - 输出尺寸: (256, 6, 6)
```

### 3. 全连接层配置

原始AlexNet的全连接层包含3个层，我针对数字识别任务进行了修改：

```
1. 扁平化操作:
   - 将卷积输出(256, 6, 6)展平为一维向量: 256 * 6 * 6 = 9216

2. 全连接层1 (FC1):
   - 输入特征: 9216
   - 输出特征: 4096
   - 激活函数: ReLU
   - Dropout: 0.5 (训练时随机丢弃50%的神经元)
   
3. 全连接层2 (FC2):
   - 输入特征: 4096
   - 输出特征: 4096
   - 激活函数: ReLU
   - Dropout: 0.5
   
4. 输出层 (Output):
   - 输入特征: 4096
   - 输出特征: 10 (对应数字0-9)
   - 无激活函数(原始输出，后续由Softmax处理)
```

### 4. 损失函数与优化器

在训练过程中，我使用了以下配置：

```python
# 损失函数: 交叉熵损失
criterion = nn.CrossEntropyLoss()

# 优化器: 随机梯度下降(SGD)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
```

**交叉熵损失函数**(CrossEntropyLoss)的数学表达式：
```
L = -1/N * Σ(y_i * log(p_i))
```
其中：
- N是批量大小
- y_i是真实标签的one-hot编码
- p_i是模型预测的概率分布
- log是自然对数

这个损失函数结合了Softmax激活和负对数似然损失，特别适合多分类问题。它会惩罚模型对正确类别的低置信度预测。

### 5. 设计思路与优化策略

1. **预训练模型利用**：
   - 使用ImageNet预训练权重初始化模型，大幅提高小数据集上的性能
   - 冻结前几层卷积层(保留通用特征)，只训练后几层适应特定任务

2. **正则化处理**：
   - 在全连接层使用Dropout(0.5)减少过拟合
   - 输入图像进行标准化处理，提高模型稳定性

3. **计算效率优化**：
   - 冻结大部分预训练层参数，只需训练约5%的模型参数
   - 输入图像尺寸为224x224，平衡精度和计算速度

4. **输出层设计**：
   - 最后一层全连接输出10个类别，对应数字0-9
   - 使用交叉熵损失函数，天然适合多分类任务

这个网络设计充分利用了AlexNet的强大特征提取能力，同时针对电表数字识别任务进行了优化，能够高效准确地识别切割后的数字图像。