In [158]:
from pymongo import MongoClient
import requests
from bs4 import BeautifulSoup
import logging

In [159]:
class JuralParser:
    
    def __init__(self, mongoUrl):
        self.mongoUrl = mongoUrl
        self.databaseName="JuralDetails"
        self.collectionName="AllDetails"


    # Подключение к БД
    def connect_db(self):
        client = MongoClient(mongoUrl)
        db = client[databaseName]
        self.collection = db[collectionName]
        collection.create_index("title", unique=True)
        return client.admin.command('ping')

    
    # Парсит переданную страницу и добавляет в БД уникальные записи
    def parse_data(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        items = soup.find_all('div', class_='products-list__item')
        new_entries = 0
        
        for item in items:
            title = item.find('div', class_='product-card__name').find('a').text.strip() if item.find('div', class_='product-card__name') else "Нет названия"
            article = item.find('div', class_='product-card__info').find('span', itemprop='model').text.strip() if item.find('div', class_='product-card__info') else "Нет артикула"
            manufacturer = item.find('div', class_='product-card__info').find('span', itemprop='brand').text.strip() if item.find('div', class_='product-card__info') else "Нет производителя"
            price = item.find('div', class_='product-card__prices').find('meta', itemprop='price')['content'] if item.find('div', class_='product-card__prices') and item.find('meta', itemprop='price') else "0"
            
            product_data = {
            "title": title,
            "article": article,
            "manufacturer": manufacturer,
            "price": int(price.replace(" ", ""))
            }
        
            try:
                self.collection.insert_one(product_data)
                new_entries = new_entries + 1
            except:
                logging.info(f"{title} уже существует")

        return new_entries
        
    
    # Поиск деталей по цене
    def get_details_with_greater_price(self, price:str = 0):
        query = {"price" : {"$gt": price}}
        documents = self.collection.find(query)
        return documents

    def print_data(self, data):
        for d in data:
            print(d)

In [160]:
source_url_1 = "https://agroteh26.ru/catalog/20-zapchasti_dlya_gruzovoj_tehniki/3-katalog_zapchastej_ural/elektrooborudovanie-fqsdg/"
source_url_2 = "https://agroteh26.ru/catalog/19-zapchasti_dlya_selhoztehniki/dieci/"

In [161]:
parser = JuralParser(mongoUrl="mongodb://localhost:27017/PSU")

In [162]:
parser.connect_db()

{'ok': 1.0}

In [164]:
parser.parse_data(source_url_1)

0

In [165]:
price_to_find = 8600

In [166]:
docs = parser.get_details_with_greater_price(price_to_find)
parser.print_data(data=docs)

{'_id': ObjectId('673cd95de0e8f8852f26afd3'), 'title': 'Главный тормозной цилиндр УРАЛ 4320-3510011 (з.д.)(без бачка)', 'article': '4320-3510011', 'manufacturer': 'УРАЛ', 'price': 10500}
{'_id': ObjectId('673cd95de0e8f8852f26afd4'), 'title': 'Главный тормозной цилиндр УРАЛ 5557-3510011( и под АБС)(без бачка)', 'article': '5557-3510011', 'manufacturer': 'УРАЛ', 'price': 13300}


In [167]:
parser.parse_data(source_url_2)

20

In [168]:
docs = parser.get_details_with_greater_price(price_to_find)
parser.print_data(data=docs)

{'_id': ObjectId('673cd95de0e8f8852f26afd3'), 'title': 'Главный тормозной цилиндр УРАЛ 4320-3510011 (з.д.)(без бачка)', 'article': '4320-3510011', 'manufacturer': 'УРАЛ', 'price': 10500}
{'_id': ObjectId('673cd95de0e8f8852f26afd4'), 'title': 'Главный тормозной цилиндр УРАЛ 5557-3510011( и под АБС)(без бачка)', 'article': '5557-3510011', 'manufacturer': 'УРАЛ', 'price': 13300}
{'_id': ObjectId('673cd963e0e8f8852f26aff7'), 'title': 'Кронштейн CAR0001826A-F (DIECI)', 'article': 'CAR0001826A-F', 'manufacturer': 'DIECI', 'price': 98900}
{'_id': ObjectId('673cd963e0e8f8852f26aff9'), 'title': 'Картридж воздушного фильтра первичный CP24420 на Dieci AGRI FARMER', 'article': 'CP24420', 'manufacturer': 'MANN', 'price': 9000}
{'_id': ObjectId('673cd963e0e8f8852f26affd'), 'title': 'Пластина CUS0000054 (DIECI)', 'article': 'CUS0000054', 'manufacturer': 'DIECI', 'price': 16800}
{'_id': ObjectId('673cd963e0e8f8852f26afff'), 'title': 'Цепь BVS0861 (DIECI)', 'article': 'BVS0861', 'manufacturer': 'DIECI'