# 下载PixivCrawler抓取到的图片

In [None]:
import json
import os
import random
import sqlite3 as lite
import time

import pandas as pd

from pixivpy3 import *

try:
    from tqdm.notebook import tqdm  # new tqdm
except:
    from tqdm import tqdm_notebook as tqdm


class PixivDownloader:
    def __init__(self, illust_db="pixiv_illusts.db"):
        self.illust_db = illust_db

    def DBIllusts(self, sql="SELECT * FROM illusts WHERE illust_id > 0"):
        with lite.connect(self.illust_db) as conn:
            sql_df = pd.read_sql_query(sql, conn, index_col="illust_id")

        # 还原json字段
        sql_df["image_urls"] = sql_df.image_urls.apply(json.loads)
        sql_df["meta_pages"] = sql_df.meta_pages.apply(json.loads)
        sql_df["meta_single_page"] = sql_df.meta_single_page.apply(json.loads)
        sql_df["series"] = sql_df.series.apply(json.loads)
        sql_df["tags"] = sql_df.tags.apply(json.loads)
        sql_df["tools"] = sql_df.tools.apply(json.loads)
        sql_df["user"] = sql_df.user.apply(json.loads)
        return sql_df

    def randSleep(self, base=0.1, rand=0.5):
        "休眠随机的时间"
        time.sleep(base + rand * random.random())

    def getImageUrl(self, illust, origin=True):
        if origin:
            return illust.meta_single_page.original_image_url or illust.image_urls.large
        else:  # square
            return illust.image_urls.square_medium

    def StartDownload(self, path, origin=True):
        if not os.path.exists(path):
            os.mkdir(path)

        api = AppPixivAPI(timeout=3)
        df = self.DBIllusts()
        for _, illust in tqdm(df.iterrows(), total=df.shape[0]):
            image_url = self.getImageUrl(illust, origin)
            for i in range(3):
                try:
                    if api.download(image_url, path=path):
                        self.randSleep()
                    break
                except Exception as e:
                    print(f">> Download {image_url} failed: {e}")


dl = PixivDownloader()

In [None]:
# download original images
df = dl.StartDownload(path="./illusts")

In [None]:
# download square images
dl.StartDownload(path="squares", origin=False)