In [3]:
from requests import get, Timeout
from bs4 import BeautifulSoup as bs
import pandas as pd
from time import sleep
import random
from datetime import datetime as dt
import json

код для проверки ниже
---
класс ParserQuotes оставил, так как наследовался от него


In [4]:
class ParserQuotes:
    
    url = "https://quotes.toscrape.com/page/"
    headers = {
        'accept': '*/*',
        'user-agent': 'Mozilla / 5.0(Macintosh; Intel Mac OS X 10_14_6)'
                    ' AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 98.0 .4758 .102 Safari / 537.36'
    }

    def __init__(self, start_parsing = True):
        
        if start_parsing:
            print("START SCRAPE")
            print("*" * 12 + "\n")
            self.content = self.start_parsing(self.url, self.headers)


    def start_parsing(self, url=url, headers=headers, start_page=1):
        page = start_page
        content = []

        while True:
            page_url = url + str(page)

            print()
            print(f"Start {page} page parsing")
            print("url --> " + page_url)

            sleep(round(random.uniform(0, 3), 3))
            soup = self.get_page_soup(page_url, headers)
            if soup == None:
                return content

            tmp = self.page_scrape(soup)
            content.extend(tmp)
            try:
                page += 1
                soup.find('a', href=f"/page/{page}/").text
            except AttributeError:
                print()
                print("end content")
                print("***********")
                print("END PARSING")
                return content


    def get_page_soup(self, page_url, headers):
        
        try:
            response = get(page_url, headers=headers, timeout=10)
            print("Status response: " + str(response.status_code))
            if response.status_code == 200:
                return bs(response.content, "html.parser")
            else:
                print("Error: status_code")
                return None
        except Timeout:
            print("error: Превышено время ожидания ответа")
            return None


    def page_scrape(self, soup):
        
        print("Start scrape")

        res = []
        quotes = soup.find_all('div', class_="quote")

        for quote in quotes:
            quote_dict = {}

            quote_dict["quote"] = quote.find('span', class_="text").text
            quote_dict["author"] = quote.find('small', class_='author').text

            tags = quote.find('meta', class_='keywords')['content'].split(',') # <<<---( 'meta', class_='keywords' )
            quote_dict["tag"] = tags

            res.append(quote_dict)


        
        print("Scrape: OK")
        return res


    def prepare_data_content(self):
        self.name_f = "content_" + dt.now().strftime('%H:%M_%d.%m.%Y') + '.json' # <<<---( .json )
        with open(self.name_f, 'w') as f:
            json.dump(self.content, f)
                    
        print()
        print("prepare_data: OK")
        print("Name: " + self.name_f)


# quotes = ParserQuotes()


In [4]:
print(f"Количество контента = {len(quotes.content)}")

print("Вот случайная цитата:\n")
tmp = quotes.content[random.randint(0, len(quotes.content) - 1)]
print(tmp['quote'])
print(tmp['author'])
print(tmp['tag'])

Количество контента = 100
Вот случайная цитата:

“There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”
Jane Austen
['friendship', 'love']


### Home work 2

In [44]:
import sqlite3

class DbQuotes(ParserQuotes):

    def __init__(self, start_parser=False, file=None):
        self.content = None

        if start_parser:
            super.__init__()
        elif not start_parser and file != None:
            self.loding_json(file) # <<<-----------( что бы не гонять парсер, если есть .json)
            # имя файла можно указать при инцилизации класса или явно вызвав метод .loding_json('file name')
            
    
    def create_db(self):
        if self.content == None: # <<<----------( проверка есть ли данные )
            print("Error: нет данных в атрибуте 'self.content'\n\n"\
                  "Используйте '.start_parsing()' или '.loding_json()'")
        else:
            quotes_tuple = self.prepare_data()
            name_db = 'sqlite.db'
            
            try:
                with sqlite3.connect(name_db) as connect:
                    cursor = connect.cursor()

                    # Создание
                    create_table = """
                    CREATE TABLE IF NOT EXISTS quote(
                        quote_id INTEGER PRIMARY KEY AUTOINCREMENT,
                        quote_text TEXT,
                        author TEXT,
                        tags TEXT
                    )
                    """
                    cursor.execute(create_table)
                    connect.commit()

                    # Заполнение
                    insert_query = '''
                    INSERT INTO quote(
                            quote_text,
                            author,
                            tags
                    )
                    VALUES (?,?,?)
                    '''
                    cursor.executemany(insert_query, quotes_tuple)
                    connect.commit()

                    
            except Exception as ex:
                print(f"Error: при работе с SQL - {ex}")

            print("\n" + "*" * 47)
            print(f"База данных {name_db} создана, данные добавлены")
                
        return self
    
    
    def prepare_data(self):
        res = []

        for quote in self.content:
            quote['tag'] = ", ".join(quote['tag'])
            res.append(tuple(quote.values()))
        
        return res
        


    def loding_json(self, file=None):

        try:
            if file == None:
                file = self.name_f
        except AttributeError:
            print("Error: Укажите имя файла 'name.json' или выполните .start_parser()")
            return

        try:
            print(f"Загрузка с файла {file}")
            with open(file, "r", encoding='utf-8') as f:
                self.content = json.load(f)
            
            print("Контент загружен")
            print("****************\n")
            print(f"Количество контента = {len(self.content)}")
            print("Вот случайная цитата:\n")
            tmp = self.content[random.randint(0, len(self.content) - 1)]
            print(tmp['quote'])
            print(tmp['author'])
            print(tmp['tag'])
        except Exception as ex:
            print(f"Error: Ошибка при загрузке файла: {ex}")
        


# file="content_11:26_12.06.2022.json"
quotes = DbQuotes(file="content_11:26_12.06.2022.json").create_db()
# quotes.create_db()

Загрузка с файла content_11:26_12.06.2022.json
Контент загружен
****************

Количество контента = 100
Вот случайная цитата:

“It matters not what someone is born, but what they grow to be.”
J.K. Rowling
['dumbledore']

***********************************************
База данных sqlite.db создана, данные добавлены


In [35]:
quotes.content[0]

{'quote': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”',
 'author': 'Albert Einstein',
 'tag': 'change, deep-thoughts, thinking, world'}