In [152]:
import os.path

import pandas as pd
import torch.utils.data
path="./data/dog_identification"
labels=pd.read_csv(path+"/labels.csv")
labels[:10]

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


In [153]:
breeds=labels.breed.unique()
breeds

array(['boston_bull', 'dingo', 'pekinese', 'bluetick', 'golden_retriever',
       'bedlington_terrier', 'borzoi', 'basenji', 'scottish_deerhound',
       'shetland_sheepdog', 'walker_hound', 'maltese_dog',
       'norfolk_terrier', 'african_hunting_dog',
       'wire-haired_fox_terrier', 'redbone', 'lakeland_terrier', 'boxer',
       'doberman', 'otterhound', 'standard_schnauzer',
       'irish_water_spaniel', 'black-and-tan_coonhound', 'cairn',
       'affenpinscher', 'labrador_retriever', 'ibizan_hound',
       'english_setter', 'weimaraner', 'giant_schnauzer', 'groenendael',
       'dhole', 'toy_poodle', 'border_terrier', 'tibetan_terrier',
       'norwegian_elkhound', 'shih-tzu', 'irish_terrier', 'kuvasz',
       'german_shepherd', 'greater_swiss_mountain_dog', 'basset',
       'australian_terrier', 'schipperke', 'rhodesian_ridgeback',
       'irish_setter', 'appenzeller', 'bloodhound', 'samoyed',
       'miniature_schnauzer', 'brittany_spaniel', 'kelpie', 'papillon',
       'borde

In [154]:
breed2idx=dict((breed, idx) for idx,breed in enumerate(breeds))
idx2breed=dict((idx,breed) for idx,breed in enumerate(breeds))
breed2idx

{'boston_bull': 0,
 'dingo': 1,
 'pekinese': 2,
 'bluetick': 3,
 'golden_retriever': 4,
 'bedlington_terrier': 5,
 'borzoi': 6,
 'basenji': 7,
 'scottish_deerhound': 8,
 'shetland_sheepdog': 9,
 'walker_hound': 10,
 'maltese_dog': 11,
 'norfolk_terrier': 12,
 'african_hunting_dog': 13,
 'wire-haired_fox_terrier': 14,
 'redbone': 15,
 'lakeland_terrier': 16,
 'boxer': 17,
 'doberman': 18,
 'otterhound': 19,
 'standard_schnauzer': 20,
 'irish_water_spaniel': 21,
 'black-and-tan_coonhound': 22,
 'cairn': 23,
 'affenpinscher': 24,
 'labrador_retriever': 25,
 'ibizan_hound': 26,
 'english_setter': 27,
 'weimaraner': 28,
 'giant_schnauzer': 29,
 'groenendael': 30,
 'dhole': 31,
 'toy_poodle': 32,
 'border_terrier': 33,
 'tibetan_terrier': 34,
 'norwegian_elkhound': 35,
 'shih-tzu': 36,
 'irish_terrier': 37,
 'kuvasz': 38,
 'german_shepherd': 39,
 'greater_swiss_mountain_dog': 40,
 'basset': 41,
 'australian_terrier': 42,
 'schipperke': 43,
 'rhodesian_ridgeback': 44,
 'irish_setter': 45,
 'a

In [155]:
labels['label_idx']=[breed2idx[b] for b in labels.breed]
labels

Unnamed: 0,id,breed,label_idx
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,0
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,1
2,001cdf01b096e06d78e9e5112d419397,pekinese,2
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,3
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,4
...,...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,borzoi,6
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont,93
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale,63
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher,77


In [156]:
from PIL import Image

class dataset(torch.utils.data.Dataset):
    def __init__(self,labels_df,img_path,transform=None):
        self.labels_df=labels_df
        self.img_path=img_path
        self.transform=transform
    def __len__(self):
        return len(self.labels_df)
    def __getitem__(self, idx):
        image_name=os.path.join(self.img_path,self.labels_df.id[idx]+'.jpg')
        img=Image.open(image_name)
        label=self.labels_df.label_idx[idx]
        if self.transform:
            img=self.transform(img)
        return img,label

In [157]:
IMG_SIZE=224
BATCH_SIZE=32
IMG_MEAN=[0.485,0.456,0.406]
IMG_STD=[0.229,0.224,0.225]
CUDA=torch.cuda.is_available()
DEVICE=torch.device('cuda' if CUDA else "cpu")

In [158]:
from torchvision import transforms

train_transformers=transforms.Compose(
    [
    transforms.Resize(IMG_SIZE),
    transforms.RandomResizedCrop(IMG_SIZE,scale=(0.95, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.Normalize(IMG_MEAN, IMG_STD),
    transforms.CenterCrop(IMG_SIZE)
    ]
)
val_transformers=transforms.Compose(
    [
    transforms.RandomResizedCrop(IMG_SIZE,scale=(0.95, 1.0)),
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(IMG_MEAN, IMG_STD)
    ]
)

In [159]:
from sklearn.model_selection import StratifiedShuffleSplit
dataset_names=['train','valid']
stratified_split=StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
train_split_idx,val_split_idx=next(stratified_split.split(labels.id,labels.breed))
train_df = labels.iloc[train_split_idx].reset_index()
val_df = labels.iloc[val_split_idx].reset_index()

In [25]:
val_df

Unnamed: 0,index,id,breed,label_idx
0,8028,c8caed58369e7cbd1af1d57ed8499220,soft-coated_wheaten_terrier,85
1,5557,8c547b5df1aff986fa67f9efc97459b9,leonberg,64
2,8117,caf49640a8436f3ed39c56b1c5e447db,borzoi,6
3,644,0fbfe941d913c3976a13cc4925e67389,rhodesian_ridgeback,44
4,9935,f9a2d5a36d32c2c1285cf46fe05972cb,cocker_spaniel,118
...,...,...,...,...
1018,2315,39c09b7ecfddca047dba2f8b088d1b57,irish_water_spaniel,21
1019,693,10f7757fdc673e159e47ea20834ba551,curly-coated_retriever,106
1020,1727,2ad417a1ddd2d35434e128db0a66b5ef,sealyham_terrier,100
1021,7258,b5d64452ea01f52960c0a7d0966fa736,leonberg,64


In [160]:
from torch.utils.data import DataLoader

image_transforms = {'train':train_transformers, 'valid':val_transformers}
train_dataset=dataset(train_df,os.path.join(path+"/train"), transform=image_transforms['train'])
valid_dataset=dataset(val_df,os.path.join(path+"/train"), transform=image_transforms['valid'])
image_dataset = {'train':train_dataset, 'valid':valid_dataset}
image_dataloader = {x:DataLoader(image_dataset[x],batch_size=BATCH_SIZE,shuffle=True,num_workers=0) for x in dataset_names}
dataset_sizes = {x:len(image_dataset[x]) for x in dataset_names}
image_dataloader['valid']

<torch.utils.data.dataloader.DataLoader at 0x1c92452a830>

In [161]:
import torchvision
model_ft=torchvision.models.resnet50(pretrained=True)
for pagram in model_ft.parameters():
    pagram.requires_grad=False
print(model_ft.fc)
num_fc_ftr = model_ft.fc.in_features #获取到fc层的输入
model_ft.fc = torch.nn.Linear(num_fc_ftr, len(breeds)) # 定义一个新的FC层
model_ft=model_ft.to(DEVICE)# 放到设备中
print(model_ft)



Linear(in_features=2048, out_features=1000, bias=True)
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
     

In [162]:
criterion=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(
    [
        {'params':model_ft.fc.parameters(),
         }
    ],lr=0.001
)

In [163]:
def train(model,device,train_loader,epoch):
    model.train()
    for batch_idx,data in enumerate(train_loader):
        x,y=data
        x=x.to(device)
        y=y.to(device)
        optimizer.zero_grad()
        y_hat=model(x)
        loss=criterion(y_hat,y)
        loss.backward()
        optimizer.step()
    print ('Train Epoch: {}\t Loss: {:.6f}'.format(epoch,loss.item()))
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i,data in enumerate(test_loader):
            x,y=data
            x=x.to(device)
            y=y.to(device)
            optimizer.zero_grad()
            y_hat=model(x)
            test_loss+=criterion(y_hat,y).item()
            pred=y_hat.max(1,keepdim=True)[1]
            correct+=pred.eq(y.view_as(pred)).sum().item()
        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(valid_dataset),
        100. * correct / len(valid_dataset)))


In [164]:
for epoch in range(1, 15):
    %time train(model=model_ft,device=DEVICE, train_loader=image_dataloader["train"],epoch=epoch)
    test(model=model_ft, device=DEVICE, test_loader=image_dataloader["valid"])

Train Epoch: 1	 Loss: 1.549677
CPU times: total: 2min 18s
Wall time: 1min 6s

Test set: Average loss: 0.0290, Accuracy: 753/1023 (74%)

Train Epoch: 2	 Loss: 0.802747
CPU times: total: 2min 27s
Wall time: 1min 11s

Test set: Average loss: 0.0245, Accuracy: 776/1023 (76%)

Train Epoch: 3	 Loss: 0.706229
CPU times: total: 2min 32s
Wall time: 1min 12s

Test set: Average loss: 0.0267, Accuracy: 766/1023 (75%)

Train Epoch: 4	 Loss: 0.786957
CPU times: total: 2min 34s
Wall time: 1min 11s

Test set: Average loss: 0.0236, Accuracy: 802/1023 (78%)

Train Epoch: 5	 Loss: 0.612978
CPU times: total: 2min 36s
Wall time: 1min 11s

Test set: Average loss: 0.0251, Accuracy: 787/1023 (77%)

Train Epoch: 6	 Loss: 1.084123
CPU times: total: 2min 35s
Wall time: 1min 14s

Test set: Average loss: 0.0257, Accuracy: 778/1023 (76%)

Train Epoch: 7	 Loss: 1.057132
CPU times: total: 2min 17s
Wall time: 1min 14s

Test set: Average loss: 0.0247, Accuracy: 784/1023 (77%)

Train Epoch: 8	 Loss: 1.115440
CPU times: 

In [165]:
sample=pd.read_csv(path+"./sample_submission.csv")
sample.head()

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [166]:
class testset(torch.utils.data.Dataset):
    def __init__(self,img_name,img_path,transform=None):
        self.img_path=img_path
        self.img_name=img_name
        self.transform=transform
    def __len__(self):
        return 10357
    def __getitem__(self, idx):
        image_name=os.path.join(self.img_path,self.img_name.id[idx]+'.jpg')
        img=Image.open(image_name)
        if self.transform:
            img=self.transform(img)
        return img

In [167]:
testData=DataLoader(testset(sample,os.path.join(path+"/test"), transform=image_transforms['valid']),batch_size=BATCH_SIZE,shuffle=True,num_workers=0)
testData

<torch.utils.data.dataloader.DataLoader at 0x1c924a55ab0>

In [168]:
labels=[]
with torch.no_grad():
    for batch_idx, data in enumerate(testData):
        x= data
        x=x.to(DEVICE)
        y_hat = model_ft(x)
        m = torch.nn.Softmax(dim=1)
        res = m(y_hat)
        labels.append(torch.Tensor.cpu(res).numpy())
labels

[array([[2.1836637e-11, 8.9903081e-09, 1.6758731e-08, ..., 7.1554189e-09,
         2.5340547e-08, 1.4793858e-10],
        [7.1699151e-07, 6.3321822e-06, 7.6095294e-03, ..., 7.7358400e-04,
         4.0944546e-04, 1.2584499e-07],
        [5.0437370e-07, 2.9715542e-05, 4.0523154e-11, ..., 3.1924722e-07,
         2.9996261e-08, 1.2935993e-09],
        ...,
        [7.3610763e-06, 2.7349453e-05, 9.4721024e-04, ..., 4.5593919e-05,
         1.1610545e-03, 2.7885231e-01],
        [6.1552337e-06, 7.7779032e-04, 1.2569693e-10, ..., 4.8173049e-05,
         7.5299971e-07, 8.2360472e-08],
        [1.0905976e-06, 5.8915223e-08, 2.2004255e-05, ..., 6.2259405e-06,
         5.0159979e-06, 7.1500364e-07]], dtype=float32),
 array([[5.4680322e-06, 8.9003535e-08, 7.3011574e-06, ..., 9.1486305e-01,
         5.2850586e-03, 3.0453739e-07],
        [1.1318770e-04, 3.0430415e-06, 5.0688006e-05, ..., 1.6113350e-03,
         1.0114999e-03, 3.0147451e-06],
        [1.0882717e-06, 1.9169889e-12, 1.3506477e-07, ...,

In [169]:
pdd=[]
for i in labels:
    for j in i:
       pdd.append(j)

In [170]:
res=pd.DataFrame(pdd)

In [171]:
d=pd.read_csv(path+"./sample_submission.csv")

In [172]:
for i in range(120):
    d[breeds[i]]=res.iloc[:,i]
d.to_csv("./sample1.csv")