In [5]:
#pip install langchain_community

In [4]:
# pip install llama-index-llms-ollama 

In [14]:
# pip install ipywidgets

In [1]:
# Import necessary libraries
import os
from pathlib import Path
import pandas as pd

# Import langchain modules
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import UnstructuredExcelLoader, CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import OllamaEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Import additional libraries
import urllib3
import json

http = urllib3.PoolManager()


## Run command in the terminal
```
docker exec -ti apan-ollama ollama pull gemma3:4b
```

In [2]:
from pymongo import MongoClient
client = MongoClient('mongodb://admin:PassW0rd@apan-mongo:27017/')

In [3]:
db = client.reviewChew

In [4]:
collection = db.business

In [14]:
# Function to fetch records from API and process them
import datetime

def fetch_and_store_records():

    # timestamp = time.time()
    now = datetime.datetime.now()
    file_name = now.strftime("%Y%m%d%H%M%S")
    output_path = (f"summary_{file_name}.json")
    file_data = []
    if os.path.exists(output_path):
        with open(output_path, 'r') as file:
            file_data = json.load(file)

    # Fetch records from API
    startPage = 7
    itemsPerPage = 1000
    # response = http.request.get(f'http://apan-api:3100/api/v1/business/list?page={startPage}&items={itemsPerPage}&sortDir=ASC&sortBy=id')
    print('Retriving data from DB')
    response = http.request('GET', (f'http://apan-api:3100/api/v1/business/list?page={startPage}&items={itemsPerPage}&sortDir=ASC&sortBy=id'))

    if response.status == 200:
            
        # Open json to save to resonse data
        data = json.loads(response.data)
        
        print(f"New API data: {len(data['result'])} loaded.\n")
        if len(data['result']) > 0:
            print(f"First Data: {(data['result'][0])}\n")
            llm = Ollama(model="gemma3:12b", base_url="http://host.docker.internal:37869", verbose=True)

            for i, business in enumerate(data['result']):
                print(f"Summarizing.... \n")
                prompt = (
                    f"I have a restaurant raw data as json like this {business}. Please generate business summary text like this 'This Roast Coffeehouse and Wine Bar fun place with takeout options. prices are inexpensive, catering available and it has an average 4-star review and etc.. so on'. Use all the available information for the summary text and Do not add any comments. Return final summary text only."
                )
    
                response = llm.invoke(prompt)
    
                print(f"{i+1} -> Business ID {business['id']}: {response}\n")
                # Append new response
                # business.summary = response
                newData = business
                newData['summary'] = response
                file_data.append(newData)
                
                post_id = collection.insert_one(newData).inserted_id
                print('Post id: {0}\n'.format(post_id))
                # Write updated data back to the file
                with open(output_path, 'w') as file:
                    json.dump(file_data, file, indent=4, default=str)

    else:
        print(f"An error occurred: {response.status}")


# Call the function to fetch and store records
fetch_and_store_records()

Retriving data from DB
New API data: 1000 loaded.

First Data: {'id': 76106, 'uuid': 'e37e00f3-7736-8a0e-90c6-c663ae3725a5', 'name': 'Las Margaritas', 'address': '2538 Welsh Rd', 'city': 'Philadelphia', 'state': 'PA', 'postal_code': '19152', 'latitude': 40.0726066, 'longitude': -75.0341246, 'stars': 3.5, 'review_count': 511, 'is_open': 1, 'attributes': {'RestaurantsAttire': "'casual'", 'OutdoorSeating': 'False', 'Alcohol': "u'full_bar'", 'RestaurantsReservations': 'True', 'BYOBCorkage': "'no'", 'GoodForKids': 'True', 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2', 'BusinessParking': "{'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 'HasTV': 'True', 'Ambience': "{'romantic': False, 'intimate': False, 'touristy': False, 'hipster': False, 'divey': False, 'classy': False, 'trendy': False, 'upscale': False, 'casual': True}", 'RestaurantsTakeOut': 'True', 'NoiseLevel': "'average'", 'Caters': 'True', 'BikeParking': 'True', 'WiFi': "u'no'

In [22]:
collection.drop()

In [17]:
def loadDataAndInsertMongoDB():
    # Load data from json file
    # check summary_{file_name}.json files in backup_wl directory and load one by one and insert to MongoDB
    directory_path = "./backup_wl"
    files = os.listdir(directory_path)
   
    for file in files:
        print(f"file name: {file}")
        
        file_path = os.path.join(directory_path, file)
        if file != '.ipynb_checkpoints' and os.path.exists(file_path):
            with open(file_path, 'r') as data:
                records = json.load(data)
                print(f"file length: {len(records)}")
                for record in records:
                    # Check if the record already exists in the database
                    existing_record = collection.find_one({"uuid": record["uuid"]})
                    if not existing_record:
                        collection.insert_one(record)

loadDataAndInsertMongoDB()

file name: summary_20250414224457.json
file length: 100
file name: summary_20250403174249.json
file length: 100
file name: summary.json
file length: 100
file name: summary_20250404192316.json
file length: 100
file name: summary_20250406070630.json
file length: 100
file name: summary_20250414010531.json
file length: 1000
file name: summary_20250403222308.json
file length: 100
file name: summary_20250404160349.json
file length: 100
file name: summary_20250417004614.json
file length: 1000
file name: summary_20250414023754.json
file length: 100
file name: summary_20250414035403.json
file length: 100
file name: summary_20250414031944.json
file length: 100
file name: summary_20250404043812.json
file length: 100
file name: summary_20250414053335.json
file length: 100
file name: summary_20250405173750.json
file length: 100
file name: summary_20250414114836.json
file length: 1000
file name: summary_20250416030515.json
file length: 1000
file name: summary_20250414232245.json
file length: 100
fil

In [10]:
collection.count()

TypeError: 'Collection' object is not callable. If you meant to call the 'count' method on a 'Collection' object it is failing because no such method exists.

In [18]:
records = list(collection.find())
num_records = len(records)
num_records

10281