## Is it a bee?

In [1]:
import socket,warnings
try:
    socket.setdefaulttimeout(1)
    socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(('1.1.1.1', 53))
except socket.error as ex: raise Exception("STOP: No internet. Click '>|' in top right and set 'Internet' switch to on")

In [2]:
import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    !pip install -Uqq fastai duckduckgo_search

The basic steps are:

1. Use DuckDuckGo to search for images of "bee photos"
1. Use DuckDuckGo to search for images of "wasp photos"
1. Fine-tune a pretrained neural network to recognise these two groups
1. Try running this model on a picture of a bee and a picture of a wasp and see if it works.

## Step 1: Download images of bees and wasps

In [3]:
from duckduckgo_search import ddg_images
from fastcore.all import *

def search_images(term, max_images=30):
    print(f"Searching for '{term}'")
    return L(ddg_images(term, max_results=max_images)).itemgot('image')

In [14]:
#NB: `search_images` depends on duckduckgo.com, which doesn't always return correct responses.

urls = search_images('bee photos', max_images=1)
urls[0]

In [15]:
from fastdownload import download_url
dest = 'bee.jpg'
download_url(urls[0], dest, show_progress=False)

from fastai.vision.all import *
im = Image.open(dest)
im.to_thumb(256,256)

In [16]:
download_url(search_images('wasp photos', max_images=1)[0], 'wasp.jpg', show_progress=False)
Image.open('wasp.jpg').to_thumb(256,256)

In [17]:
searches = 'wasp','bee'
path = Path('bee_or_not')
from time import sleep

for o in searches:
    dest = (path/o)
    dest.mkdir(exist_ok=True, parents=True)
    download_images(dest, urls=search_images(f'{o} photo'))
    sleep(10)  # Pause between searches to avoid over-loading server
    download_images(dest, urls=search_images(f'{o} sun photo'))
    sleep(10)
    download_images(dest, urls=search_images(f'{o} shade photo'))
    sleep(10)
    resize_images(path/o, max_size=400, dest=path/o)

## Step 2: Train the model

In [18]:
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
len(failed)

In [19]:
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock),   #what kind of input? what kind of output?
    get_items=get_image_files,     #use this function to get images
    splitter=RandomSplitter(valid_pct=0.2, seed=42),    #testing and validation set split
    get_y=parent_label,            #function to return parent folder from "forest" or "bird" folder
    item_tfms=[Resize(192, method='squish')]        #puts them into same size
).dataloaders(path, bs=32)

dls.show_batch(max_n=6)

In [20]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(3)

## Step 3: Use the model

In [21]:
is_bee,_,probs = learn.predict(PILImage.create('bee.jpg'))
print(f"This is a: {is_bee}.")
print(f"Probability it's a bee: {probs[0]:.4f}")

In [25]:
is_bee,_,probs = learn.predict(PILImage.create('wasp.jpg'))
print(f"This is a: {is_bee}.")
print(f"Probability it's a bee: {probs[0]:.4f}")