In [None]:
%load_ext lab_black
# Enforces good coding style. Running it is optional.

In [None]:
# Import the the library that helps authenticate google services.
import ohawf

In [None]:
# A list containing requested Google services (GPhotos is not an ohawf default)
scopes = [
    "https://www.googleapis.com/auth/photoslibrary.readonly",
    "https://www.googleapis.com/auth/photoslibrary.appendonly",
]

In [None]:
# Causes web login prompt for the Google login you want to use.
creds = ohawf.get(scopes=scopes)

In [None]:
# This is in common for connecting to most Google services.
from apiclient.discovery import build

In [None]:
# Build an authenticated service connection to Google Photos.
service = build("photoslibrary", "v1", credentials=creds, static_discovery=False)

# Congratulations!

If your code ran without errors down to here, you're connected to Google Photos. We have to choose between the [list](https://developers.google.com/photos/library/reference/rest/v1/mediaItems/list) and the [search](https://developers.google.com/photos/library/reference/rest/v1/mediaItems/search) APIs. Only the search api gives us enough parameters to effectively ***chunk*** the results so that the same request always results in the same set of pictures from the same time-period. A month's worth of photos is too much and a day's worth is too little. A week's worth is just right. But how to number our weeks?

## Week Zero: Pick UTC or Your Birthday

Coordinated Universal Time, a.k.a. the Unix Timestamp or UTC is the number seconds that have elapsed since January 1st, 1970 at 00:00:00 UTC. Make that week-zero or use your birthday. Either way, you'll have a convenient week numbering system that will nicely chunk Google Photos into manageable sets of photos. 

In [None]:
# Import some datetime utilities
from datetime import date, datetime, timedelta

In [None]:
# Pick whether to use your birthday or UTC Week Zero to start.
start_utc = date(1970, 1, 1)  # This is when UTC time starts.
birthday = date(1970, 8, 27)  # Optionally set to your birthday.
start_date = birthday  # Set when you'd like week-zero to begin.

In [None]:
# Display what day of week you chose, and week-zero start date.
weekday = start_date.weekday()
days_to_sunday = 6 - weekday
start_week = start_date - timedelta(days=days_to_sunday)
if start_date == start_utc:
    msg = "January 1, 1970 was a"
else:
    msg = "You were born on a"
print(f"{msg} {start_date:%A}.")
print(f"Week-zero starts {start_week}.")

In [None]:
# Show how many weeks there have been since start date.
current_week = start_week
week_number = 0
while current_week < datetime.today().date():
    current_week = current_week + timedelta(days=7)
    week_number += 1
weeks_to_date = week_number
print(f"We are on week number {weeks_to_date} since {start_week}.")

In [None]:
# Import nametuple which lets you create labeled bundles of API-calls.
from collections import namedtuple

Week = namedtuple("Week", "number, start, end")
print(f"You have created a custom data class: {Week}")

In [None]:
# Create a table with the API parameters and arguments for every week.
current_week = start_week
week_number = 0
table = []
while current_week < datetime.today().date():
    end_of_week = current_week + timedelta(days=6)
    row = Week(f"{week_number}", f"{current_week}", f"{end_of_week}")
    table.append(row)
    current_week = current_week + timedelta(days=7)
    week_number += 1
print("First 10 rows of table:")
table[:10]

# Enumerating All Your Photos

Getting a good handle on all your media in Google Photos could be a very large or a very small task depending on how many media items you have, and for how far back. You have a limited amount of API usage per day, called quota, and we want to use it wisely. If you have a lot of photos, this will not be a 1-day thing. Expect to run this script over the course of days or weeks, giving your tender love and care to your photos a week at a time.

## The Plan
  
We will eventually make individual thumbnail images on your local system, one for every media item you have in Google Photos, but we will start out by focusing on a single week. Once we have 1 week under control, we will process the rest, spreading the job out over multiple days as quota allows. Every thumbnail image is stuffed with all the meta data from the original source image, which is a good way to secure information such as original filename and original create date.

In [None]:
# Import tool to choose a random integer in a range.
from random import randint

# Set the oldest date you might want to process media.
no_older_than = date(2010, 11, 7)

In [None]:
# Caluclate the eariest week number for the randint range.
for i, week in enumerate(table):
    start = datetime.strptime(week.start, "%Y-%m-%d").date()
    if start > no_older_than:
        break
earliest_number = int(week.number)
print(f"Earliest week is number {earliest_number}.")

In [None]:
# Pick the random week to process.
from pathlib import Path

while True:
    random_week = table[randint(earliest_number, weeks_to_date)]
    if random_week not in [int(x.name) for x in Path("photos/source").glob("*")]:
        break
print(f"The week I chose is {random_week}")

In [None]:
# Get the query ready that will load the first result-set of data:

sY, sM, sD = random_week.start.split("-")
eY, eM, eD = random_week.end.split("-")

npt = ""
args = {
    "pageSize": 100,
    "pageToken": npt,
    "filters": {
        "includeArchivedMedia": True,
        "dateFilter": {
            "ranges": [
                {
                    "startDate": {"year": sY, "month": sM, "day": sD},
                    "endDate": {"year": eY, "month": eM, "day": eD},
                }
            ]
        },
    },
}

In [None]:
# Grab the first result-set of data

results = service.mediaItems().search(body=args).execute()

print("Now you're holding these results only in memory.")
print("Done!")

In [None]:
# Get the tools that let us write stuff to drive.
from pathlib import Path
from sqlitedict import SqliteDict as sqldict

In [None]:
# Pick a week and make sure it has a database.
data = "photos/db"
Path(data).mkdir(exist_ok=True, parents=True)
npt = results.get("nextPageToken")
weekdb = f"{data}/{random_week.number}.db"

In [None]:
# Write the results we currently only have in memory to drive.
with sqldict(weekdb) as db:
    db[npt] = results
    db.commit()

In [None]:
# Show that you can step through the data you have on drive.
# You will not see anything happen, but the fact it can run
# means it can reproduce the data it just stored.
#                                                  But the clock is ticking!

with sqldict(weekdb) as db:
    for npt in db:
        results = db[npt]
print("Done")

In [None]:
# Show yourself that there are 2 "top-level" keys: medaItems and nextPageToken
for item in results:
    print(item)

In [None]:
# Fetch the remainder of the data for that week.
# This steps through all responses populated with a pageToken.
# It's important to note you don't actually have any pictures saved locally yet.
c = 0
while npt:
    args = {
        "pageSize": 100,
        "pageToken": npt,
        "filters": {
            "includeArchivedMedia": True,
            "dateFilter": {
                "ranges": [
                    {
                        "startDate": {"year": sY, "month": sM, "day": sD},
                        "endDate": {"year": eY, "month": eM, "day": eD},
                    }
                ]
            },
        },
    }
    results = service.mediaItems().search(body=args).execute()
    npt = results.get("nextPageToken")
    if npt:
        with sqldict(weekdb) as db:
            db[npt] = results
            db.commit()
    c += 1
    print(c, end=" ")
print("Done")

In [None]:
# Load the libraries that let you save response objects as local files.
from httpx import get
from PIL import Image
from io import BytesIO
from imagehash import whash

source = f"photos/source/{random_week.number}"
Path(f"{source}/archive").mkdir(exist_ok=True, parents=True)

with sqldict(weekdb) as db:
    for i, npt in enumerate(db):
        results = db[npt]
        print(i + 1, end=" ")
        for item in results["mediaItems"]:
            if item["mimeType"].split("/")[0] == "image":
                oldname = item["filename"]
                filename = f"{item['id']}____{oldname}"
                full_path = f"{source}/{filename}"
                if not Path(full_path).exists():
                    use_url = f"{item['baseUrl']}=d"
                    response = get(use_url, timeout=30)
                    with open(full_path, "wb") as fh:
                        fh.write(response.content)
print("Done")

In [None]:
npt = ""
args = {
    "pageSize": 100,
    "pageToken": npt,
    "filters": {
        "includeArchivedMedia": False,
        "dateFilter": {
            "ranges": [
                {
                    "startDate": {"year": sY, "month": sM, "day": sD},
                    "endDate": {"year": eY, "month": eM, "day": eD},
                }
            ]
        },
    },
}

In [None]:
results = service.mediaItems().search(body=args).execute()

In [None]:
data = "photos/db"
weekdb2 = f"{data}/{random_week.number}_b.db"

In [None]:
with sqldict(weekdb2) as db:
    db[npt] = results
    db.commit()

In [None]:
c = 0
npt = results.get("nextPageToken")
while npt:
    args = {
        "pageSize": 100,
        "pageToken": npt,
        "filters": {
            "includeArchivedMedia": False,
            "dateFilter": {
                "ranges": [
                    {
                        "startDate": {"year": sY, "month": sM, "day": sD},
                        "endDate": {"year": eY, "month": eM, "day": eD},
                    }
                ]
            },
        },
    }
    results = service.mediaItems().search(body=args).execute()
    npt = results.get("nextPageToken")
    if npt:
        with sqldict(weekdb2) as db:
            db[npt] = results
            db.commit()
    c += 1
    print(c, end=" ")
print("Done")