In [None]:
import requests
import os
from pathlib import Path
from PIL import Image
from io import BytesIO
import time
import random

In [None]:
### Step1 Scraping images for training model(You should manually remove noise to improve accuracy of classification)

In [None]:
#Scraping the true data
save_dir = Path("/miku_classifier/data/true")
save_dir.parent.mkdir(parents=True, exist_ok=True)
save_dir.mkdir(exist_ok=True)

tag = "hatsune_miku"
limit = 100
total_images = 1000
total_pages = total_images // limit

base_url = f"https://danbooru.donmai.us/posts.json"

response = requests.get(base_url)
posts = response.json()

# Download
for page in range(1, total_pages + 1):
    params = {
        'tags': tag,
        'limit': limit,
        'page': page
    }
    response = requests.get(base_url, params=params)
    posts = response.json()

    if not posts:
        print("No more images found.")
        break

    for i, post in enumerate(posts):
        if 'file_url' in post:
            image_url = post['file_url']
            try:
                img_data = requests.get(image_url)
                image = Image.open(BytesIO(img_data.content))
                ext = image.format.lower()
                filename = os.path.join(save_dir, f"miku_{(page - 1) * limit + i + 1}.{ext}")
                image.save(filename)
                print(f"Saved: {filename}")
            except Exception as e:
                print(f"Failed to save image {i}: {e}")

    #Sleep to avoid restrictions
    time.sleep(1)

print("Finished downloading true images.")

In [None]:
save_dir = Path("/miku_classifier/data/true")

image_files = [f for f in save_dir.glob("*") if f.suffix.lower() in [".jpg", ".jpeg", ".png", ".webp",".gif"]]

random.seed(42)
random.shuffle(image_files)

for i, image_path in enumerate(image_files):
    ext = image_path.suffix.lower()
    new_name = f"{i+1:04d}{ext}"
    new_path = save_dir / new_name
    image_path.rename(new_path)

print("Renaming and shuffling complete.")

In [None]:
#Scraping the false data
save_dir = Path("/miku_classifier/data/false")
save_dir.parent.mkdir(parents=True, exist_ok=True)
save_dir.mkdir(exist_ok=True)

tag = "-hatsune_miku -vocaloid"
limit = 100
total_images = 1000
total_pages = total_images // limit

base_url = "https://danbooru.donmai.us/posts.json"

for page in range(1, total_pages + 1):
    params = {
        'tags': tag,
        'limit': limit,
        'page': page
    }

    response = requests.get(base_url, params=params)
    posts = response.json()

    if not posts:
        print("No more images found.")
        break

    for i, post in enumerate(posts):
        if 'file_url' in post:
            image_url = post['file_url']
            try:
                img_data = requests.get(image_url)
                image = Image.open(BytesIO(img_data.content))
                ext = image.format.lower()
                filename = os.path.join(save_dir, f"false_{(page - 1) * limit + i + 1}.{ext}")
                image.save(filename)
                print(f"Saved: {filename}")
            except Exception as e:
                print(f"Failed to save image {i}: {e}")

    time.sleep(1)

print("Finished downloading false images.")

In [None]:
save_dir = Path("/miku_classifier/data/false")

image_files = [f for f in save_dir.glob("*") if f.suffix.lower() in [".jpg", ".jpeg", ".png", ".webp",".gif"]]

random.seed(42)
random.shuffle(image_files)

for i, image_path in enumerate(image_files):
    ext = image_path.suffix.lower()
    new_name = f"{i+1:04d}{ext}"
    new_path = save_dir / new_name
    image_path.rename(new_path)

print("Renaming and shuffling complete.")

In [None]:
### Step2 training model

In [None]:
!pip install tensorflow numpy matplotlib opencv-python

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split


def load_and_preprocess_images(image_dir, target_size=(128, 128)):
    images = []
    labels = []

    image_dir = '/miku_classifier/data/'

    true_dir = os.path.join(image_dir, 'true')
    for img_name in os.listdir(true_dir):
        img_path = os.path.join(true_dir, img_name)
        img = load_img(img_path, target_size=target_size)
        img = img_to_array(img) / 255.0
        images.append(img)
        labels.append(1)

    false_dir = os.path.join(image_dir, 'false')
    for img_name in os.listdir(false_dir):
        img_path = os.path.join(false_dir, img_name)
        img = load_img(img_path, target_size=target_size)
        img = img_to_array(img) / 255.0
        images.append(img)
        labels.append(0)

    images = np.array(images)
    labels = np.array(labels)
    return images, labels

image_dir = '/miku_classifier/data/'
images, labels = load_and_preprocess_images(image_dir)

X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(128, 128, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32)

In [None]:
### validating model(The validation accuracy rate is about 80%)

In [None]:
test_loss, test_acc = model.evaluate(X_val, y_val, verbose=2)
print(f'Validation accuracy: {test_acc}')

In [None]:
### test for arbitrary image(You can designate any image by URL)

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import requests
from PIL import Image
from io import BytesIO
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array

In [None]:
url_input = widgets.Text(
    value='',
    placeholder='Paste URL here',
    description='Image URL:',
    layout=widgets.Layout(width='600px')
)

predict_button = widgets.Button(
    description='predict',
    button_style='success',
    tooltip='predict for designated image'
)

output = widgets.Output()

In [None]:
def predict_image_from_url(url, model, target_size=(128, 128)):
    try:
        response = requests.get(url)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content)).convert("RGB")
        display(img)
        img = img.resize(target_size)
        img = img_to_array(img) / 255.0
        img = np.expand_dims(img, axis=0)
        prediction = model.predict(img)
        return prediction[0][0] > 0.5
    except Exception as e:
        print(f"failed: {e}")
        return None

In [None]:
def on_predict_clicked(b):
    with output:
        clear_output()
        url = url_input.value
        result = predict_image_from_url(url, model)
        if result is None:
            print("failed to classify")
        elif result:
            print("Miku in a picture!")
        else:
            print("Miku isn't in a picture.")

In [None]:
predict_button.on_click(on_predict_clicked)
display(url_input, predict_button, output)

In [None]:
'''
テストに使えそうなURL
(T)
https://piapro.net/images/ch_img_miku.png
https://magicalmirai.com/2024/images/special/gallery/visual/visual_tama_main.jpg
https://special.goodsmile.info/mikudayo10th/images/img_product_a.png

(F)
https://www.ssw.co.jp/products/vocaloid6/megpoid/images/v6Gumi_nbg.png
https://kasaneteto.jp/sv/img/sv-teto.png
https://life.ja-group.jp/upload/food/vegetable/main/8_1.jpg

'''

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir -p /content/drive/MyDrive/miku_project/data

In [None]:
!mv /miku_classifier/data/true /content/drive/MyDrive/miku_project/data

In [None]:
!mv /miku_classifier/data/false /content/drive/MyDrive/miku_project/data