In [5]:
# TensorFlow and tf.keras
import json
import random
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow_datasets as tfds

In [6]:
(ds_train, ds_test), ds_info = tfds.load(
    'stanford_dogs',
    split=['train', 'test'],
    as_supervised=True,
    shuffle_files=True,
    with_info=True,
)

In [7]:
ds_info.features["label"].num_classes

120

In [8]:
class_names = ds_info.features["label"].names

In [9]:
len(class_names)

120

In [10]:
class_names

['n02085620-chihuahua',
 'n02085782-japanese_spaniel',
 'n02085936-maltese_dog',
 'n02086079-pekinese',
 'n02086240-shih-tzu',
 'n02086646-blenheim_spaniel',
 'n02086910-papillon',
 'n02087046-toy_terrier',
 'n02087394-rhodesian_ridgeback',
 'n02088094-afghan_hound',
 'n02088238-basset',
 'n02088364-beagle',
 'n02088466-bloodhound',
 'n02088632-bluetick',
 'n02089078-black-and-tan_coonhound',
 'n02089867-walker_hound',
 'n02089973-english_foxhound',
 'n02090379-redbone',
 'n02090622-borzoi',
 'n02090721-irish_wolfhound',
 'n02091032-italian_greyhound',
 'n02091134-whippet',
 'n02091244-ibizan_hound',
 'n02091467-norwegian_elkhound',
 'n02091635-otterhound',
 'n02091831-saluki',
 'n02092002-scottish_deerhound',
 'n02092339-weimaraner',
 'n02093256-staffordshire_bullterrier',
 'n02093428-american_staffordshire_terrier',
 'n02093647-bedlington_terrier',
 'n02093754-border_terrier',
 'n02093859-kerry_blue_terrier',
 'n02093991-irish_terrier',
 'n02094114-norfolk_terrier',
 'n02094258-norwi

In [11]:
clean_class_names = [item.split('-', 1)[1] for item in class_names]

print(clean_class_names)

['chihuahua', 'japanese_spaniel', 'maltese_dog', 'pekinese', 'shih-tzu', 'blenheim_spaniel', 'papillon', 'toy_terrier', 'rhodesian_ridgeback', 'afghan_hound', 'basset', 'beagle', 'bloodhound', 'bluetick', 'black-and-tan_coonhound', 'walker_hound', 'english_foxhound', 'redbone', 'borzoi', 'irish_wolfhound', 'italian_greyhound', 'whippet', 'ibizan_hound', 'norwegian_elkhound', 'otterhound', 'saluki', 'scottish_deerhound', 'weimaraner', 'staffordshire_bullterrier', 'american_staffordshire_terrier', 'bedlington_terrier', 'border_terrier', 'kerry_blue_terrier', 'irish_terrier', 'norfolk_terrier', 'norwich_terrier', 'yorkshire_terrier', 'wire-haired_fox_terrier', 'lakeland_terrier', 'sealyham_terrier', 'airedale', 'cairn', 'australian_terrier', 'dandie_dinmont', 'boston_bull', 'miniature_schnauzer', 'giant_schnauzer', 'standard_schnauzer', 'scotch_terrier', 'tibetan_terrier', 'silky_terrier', 'soft-coated_wheaten_terrier', 'west_highland_white_terrier', 'lhasa', 'flat-coated_retriever', 'cur

In [12]:
import zipfile

In [13]:
#zip_file = zipfile.ZipFile('/content/dog-breed-identification.zip')
#zip_file.extractall('/content/sample_data/')

In [14]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


import warnings
warnings.filterwarnings('ignore')

In [15]:
train_dir = '/content/sample_data/dog-breed-identification/train'

In [16]:
train_size = len(os.listdir('/content/sample_data/dog-breed-identification/train'))

train_size

10222

In [17]:
df = pd.read_csv('../content/sample_data/dog-breed-identification/labels.csv')

In [18]:
dog_breeds = sorted(df['breed'].unique())
n_classes = len(dog_breeds)
print(n_classes)
dog_breeds

120


['affenpinscher',
 'afghan_hound',
 'african_hunting_dog',
 'airedale',
 'american_staffordshire_terrier',
 'appenzeller',
 'australian_terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'black-and-tan_coonhound',
 'blenheim_spaniel',
 'bloodhound',
 'bluetick',
 'border_collie',
 'border_terrier',
 'borzoi',
 'boston_bull',
 'bouvier_des_flandres',
 'boxer',
 'brabancon_griffon',
 'briard',
 'brittany_spaniel',
 'bull_mastiff',
 'cairn',
 'cardigan',
 'chesapeake_bay_retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker_spaniel',
 'collie',
 'curly-coated_retriever',
 'dandie_dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english_foxhound',
 'english_setter',
 'english_springer',
 'entlebucher',
 'eskimo_dog',
 'flat-coated_retriever',
 'french_bulldog',
 'german_shepherd',
 'german_short-haired_pointer',
 'giant_schnauzer',
 'golden_retriever',
 'gordon_setter',
 'great_dane',
 'great_pyrenees',
 'greater_swiss_mountain_dog',
 'groenendael',


In [19]:
count_num = 0

for breed in dog_breeds:
  for check_name in clean_class_names:
    if breed == check_name:
      count_num += 1

In [20]:
count_num

120

In [21]:
return_breeds = []
check_num = 0

for one_data in df['breed']:
  for check_name, change_name in zip(clean_class_names, class_names):
    if one_data == check_name:
      return_breeds.append(change_name)
      check_num += 1

In [22]:
check_num

10222

In [23]:
return_breeds

['n02096585-boston_bull',
 'n02115641-dingo',
 'n02086079-pekinese',
 'n02088632-bluetick',
 'n02099601-golden_retriever',
 'n02093647-bedlington_terrier',
 'n02093647-bedlington_terrier',
 'n02090622-borzoi',
 'n02110806-basenji',
 'n02092002-scottish_deerhound',
 'n02105855-shetland_sheepdog',
 'n02089867-walker_hound',
 'n02085936-maltese_dog',
 'n02088632-bluetick',
 'n02094114-norfolk_terrier',
 'n02116738-african_hunting_dog',
 'n02095314-wire-haired_fox_terrier',
 'n02090379-redbone',
 'n02095570-lakeland_terrier',
 'n02108089-boxer',
 'n02107142-doberman',
 'n02091635-otterhound',
 'n02091635-otterhound',
 'n02093647-bedlington_terrier',
 'n02115641-dingo',
 'n02099601-golden_retriever',
 'n02097209-standard_schnauzer',
 'n02102973-irish_water_spaniel',
 'n02089078-black-and-tan_coonhound',
 'n02096177-cairn',
 'n02110627-affenpinscher',
 'n02099712-labrador_retriever',
 'n02091244-ibizan_hound',
 'n02100735-english_setter',
 'n02089867-walker_hound',
 'n02092339-weimaraner',
 

In [24]:
df

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,borzoi
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher


In [25]:
dataf = pd.DataFrame()

dataf['id'] = df['id']

In [26]:
dataf['breed'] = return_breeds

In [27]:
dataf

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,n02096585-boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,n02115641-dingo
2,001cdf01b096e06d78e9e5112d419397,n02086079-pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,n02088632-bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,n02099601-golden_retriever
...,...,...
10217,ffd25009d635cfd16e793503ac5edef0,n02090622-borzoi
10218,ffd3f636f7f379c51ba3648a9ff8254f,n02096437-dandie_dinmont
10219,ffe2ca6c940cddfee68fa3cc6c63213f,n02096051-airedale
10220,ffe5f6d8e2bff356e9482a80a6e29aac,n02107312-miniature_pinscher


In [28]:
import os
import shutil

#source_dir = '/content/sample_data/test/'
#destination_dir = '/content/sample_data/train/'

#files = os.listdir(source_dir)

#for file_name in files:
    #source_path = os.path.join(source_dir, file_name)
    #destination_path = os.path.join(destination_dir, file_name)

    #if os.path.isfile(source_path):
        #shutil.move(source_path, destination_path)
        #print(f"Moved: {file_name}")

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Moved: 0a859b76a52d4fac456653a1d45ddf3d.jpg
Moved: 87e67ad3d0620a61ea7cc95a3e17f123.jpg
Moved: c0e7dbddbcb07bd8609e96a564610aca.jpg
Moved: cb65f7bf5f3487c56536d83102895999.jpg
Moved: b2fb4005c7ca845beaadfc8063cd45a1.jpg
Moved: 5d9dbd6fb2bd2df81443509a0e60a7c3.jpg
Moved: 955151be94bfd94a8aef282e15b0e819.jpg
Moved: 6371f236cb3596b625bec5b72ff63cbf.jpg
Moved: f0341ba6b21465ece32d7df0ebff3048.jpg
Moved: 314f2892b55140bdc3fc3695cf85b9a3.jpg
Moved: 7d77bc154a960b927abd85a56d22616d.jpg
Moved: 117ea9f58dbe47140f9f1c3297b0a4b1.jpg
Moved: 2c93a523f3c975aaf10aa85bea49336e.jpg
Moved: 5c05f1895a7caeebc627e0a85d7df32a.jpg
Moved: 19905a98817ec6df4765ad5713558a76.jpg
Moved: 4a692ed4c770d44d6ad825b7f43b31ca.jpg
Moved: d96f32fa517b8627b63d8dec5351f102.jpg
Moved: ad57aa30eb969a3a70acc9504a73a82c.jpg
Moved: 7c8c5965eccd3830d40cbe600ba1f12a.jpg
Moved: 84072d2142d43e27decbad9736141bb0.jpg
Moved: 4e79457d26002b5161324dc257bdca6b.jpg
Moved: 3e29d30cb4344b152bd

In [29]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from keras.preprocessing.image import load_img
from keras.utils import to_categorical

In [30]:
def serialize_example(image, label):
    image_raw = tf.io.serialize_tensor(image).numpy()
    feature = {
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_raw])),
        'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode()])),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature))
    return example.SerializeToString()

In [34]:
batch_size = 100
num_batches = 100
image_size = (224, 224, 3)
tfrecord_file = "dataset.tfrecord"
image_dir = '/content/sample_data/all_data/'

In [36]:
image_ids = dataf['id']

with tf.io.TFRecordWriter(tfrecord_file) as writer:
    for batch_idx in range(num_batches):
        print(f"Processing batch {batch_idx + 1}/{num_batches}")
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, len(image_ids))

        for img_id, label in zip(image_ids[start_idx:end_idx], dataf['breed'][start_idx:end_idx]):
            img_dir = image_dir + img_id + '.jpg'
            img = load_img(img_dir, target_size=image_size)
            img_array = img_to_array(img)
            serialized = serialize_example(img_array, label)
            writer.write(serialized)

Processing batch 1/100
Processing batch 2/100
Processing batch 3/100
Processing batch 4/100
Processing batch 5/100
Processing batch 6/100
Processing batch 7/100
Processing batch 8/100
Processing batch 9/100
Processing batch 10/100
Processing batch 11/100
Processing batch 12/100
Processing batch 13/100
Processing batch 14/100
Processing batch 15/100
Processing batch 16/100
Processing batch 17/100
Processing batch 18/100
Processing batch 19/100
Processing batch 20/100
Processing batch 21/100
Processing batch 22/100
Processing batch 23/100
Processing batch 24/100
Processing batch 25/100
Processing batch 26/100
Processing batch 27/100
Processing batch 28/100
Processing batch 29/100
Processing batch 30/100
Processing batch 31/100
Processing batch 32/100
Processing batch 33/100
Processing batch 34/100
Processing batch 35/100
Processing batch 36/100
Processing batch 37/100
Processing batch 38/100
Processing batch 39/100
Processing batch 40/100
Processing batch 41/100
Processing batch 42/100
P