In [2]:
!pip install pdfplumber
!pip install openai==0.28.1
!pip install tiktoken==0.6.0
!pip install langchain==0.1.20
!pip install chromadb==0.5.0
!pip install faiss-cpu



In [3]:
import openai
import numpy as np
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from openai.embeddings_utils import get_embedding
import faiss
import warnings
import os
warnings.filterwarnings("ignore")
import pdfplumber

In [None]:
openai.api_key = " "
os.environ['OPENAI_API_KEY'] = ' '

In [5]:
def extract_tables_from_pdf(pdf_path):
    tables = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            tables.extend(page.extract_tables())
    return tables

In [9]:
tables = extract_tables_from_pdf("/content/Dentist.pdf")

In [11]:
print(tables)

[[['CLINIC NAME', 'CLINIC HOURS', 'CONTACT NO', 'STREET', 'BRGY', 'MUNICIPAL', 'PROVINCE', 'REGION'], ['ALIP DENTAL CLINIC', 'M-S 9:00AM-5:00PM BY APPOINTMENT', '(074) 448-8341, (0917) 627-2610', 'B108 JUNIPER BLDG.,', 'GEN. LUNA RD', 'BAGUIO CITY', 'BENGUET', 'CAR - CORDILLERA ADMINISTRATIVE REGION'], ['SMILE DENTAL CLINIC', 'M-S 9:00AM-5:00PM', '(074) 442-9028', 'UNIT 3-B / 5 & 6 PUSO', 'NSGES BSIAOGNU RIOO,AD', 'BAGUIO CITY', 'BENGUET', 'CAR - CORDILLERA ADMINISTRATIVE REGION'], ['THE DENTAL STUDIO BY: DR. KRISHA SY', 'M-S 9:00AM-6:00PM', '(0917) 506-9031', '2ND FLR. ECCO BLDG.', 'ASSUMPTION ROAD,', 'BAGUIO CITY', 'BENGUET', 'CAR - CORDILLERA ADMINISTRATIVE REGION'], ['GENOVEZA DENTAL CLINIC', 'M-S 9:00AM-6:00PM; SUN (BY APPOINTMENT)', '(074) 423-0947', 'GF 104 BALISONG BLDG', '., ASSUMPTION ROAD.,', 'BAGUIO CITY', 'BENGUET', 'CAR - CORDILLERA ADMINISTRATIVE REGION'], ['DR. ANNA LEAH TORRES-SISON DENTAL CLINIC', 'M-S 10:AM-5:00PM BY APPOINTMENT', '(074) 442-4040, (0917) 560-4562', '

In [12]:
dataframes = []
dataframed = None
for table in tables:
    dataframed = pd.DataFrame(table[1:], columns=table[0])
    dataframes.append(dataframed)

In [13]:
dataframed

Unnamed: 0,CLINIC NAME,CLINIC HOURS,CONTACT NO,STREET,BRGY,MUNICIPAL,PROVINCE,REGION
0,ALIP DENTAL CLINIC,M-S 9:00AM-5:00PM BY APPOINTMENT,"(074) 448-8341, (0917) 627-2610","B108 JUNIPER BLDG.,",GEN. LUNA RD,BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION
1,SMILE DENTAL CLINIC,M-S 9:00AM-5:00PM,(074) 442-9028,UNIT 3-B / 5 & 6 PUSO,"NSGES BSIAOGNU RIOO,AD",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION
2,THE DENTAL STUDIO BY: DR. KRISHA SY,M-S 9:00AM-6:00PM,(0917) 506-9031,2ND FLR. ECCO BLDG.,"ASSUMPTION ROAD,",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION
3,GENOVEZA DENTAL CLINIC,M-S 9:00AM-6:00PM; SUN (BY APPOINTMENT),(074) 423-0947,GF 104 BALISONG BLDG,"., ASSUMPTION ROAD.,",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION
4,DR. ANNA LEAH TORRES-SISON DENTAL CLINIC,M-S 10:AM-5:00PM BY APPOINTMENT,"(074) 442-4040, (0917) 560-4562","RM. A1, 3/F DOÑA ANIT","A#6 B6L DSEGS.S 2IO,N ROAD",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION
...,...,...,...,...,...,...,...,...
161,GDH DENTAL CLINIC,M-S 10:00AM-6:00PM BY APPOINTMENT,(02) 410-2257,10-A STO. NINO ST.,"BRGY. SAN ANTONIO, S",AQNU FERZAONNC CISICTOY DEL M,ODNITSETRICT 2,NCR - NATIONAL CAPITAL REGION
162,DENTAL CARE CORNER,M-S 10:00AM-7:00PM,"(02) 928-0256, (02) 957-4416, (0906) 305-6031","2/F UNIT A, 56 VISAYAS",ABVREGNYU. VEASRA,QUEZON CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION
163,DR. FLORENCE A. DE VENECIA DENTAL CLINIC - UCP...,M-S 9:00AM-5:00PM BY APPOINTMENT,"(02) 736-7142, (02) 924-2369, (02) 990-2373, (...",RM 403 UCPI (KALAYAA,"ND)I,L IMMAAGNI,NOO ST.,",QUEZON CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION
164,BUHAIN DENTAL CLINIC,M-S 9:00AM-5:00PM STRICTLY BY APPOINTMENT,"(02) 434-8181, (02) 435-3556, (0927) 498-5239",UNIT 102 EAGLE STAR C,OLONYDOOL.A 2 H5 EFIAGBHIATSN DELA,QRUOESZAO SNT. CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION


In [14]:
dataframed['combined'] = dataframed.apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
dataframed

Unnamed: 0,CLINIC NAME,CLINIC HOURS,CONTACT NO,STREET,BRGY,MUNICIPAL,PROVINCE,REGION,combined
0,ALIP DENTAL CLINIC,M-S 9:00AM-5:00PM BY APPOINTMENT,"(074) 448-8341, (0917) 627-2610","B108 JUNIPER BLDG.,",GEN. LUNA RD,BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION,ALIP DENTAL CLINIC M-S 9:00AM-5:00PM BY APPOIN...
1,SMILE DENTAL CLINIC,M-S 9:00AM-5:00PM,(074) 442-9028,UNIT 3-B / 5 & 6 PUSO,"NSGES BSIAOGNU RIOO,AD",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION,SMILE DENTAL CLINIC M-S 9:00AM-5:00PM (074) 44...
2,THE DENTAL STUDIO BY: DR. KRISHA SY,M-S 9:00AM-6:00PM,(0917) 506-9031,2ND FLR. ECCO BLDG.,"ASSUMPTION ROAD,",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION,THE DENTAL STUDIO BY: DR. KRISHA SY M-S 9:00AM...
3,GENOVEZA DENTAL CLINIC,M-S 9:00AM-6:00PM; SUN (BY APPOINTMENT),(074) 423-0947,GF 104 BALISONG BLDG,"., ASSUMPTION ROAD.,",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION,GENOVEZA DENTAL CLINIC M-S 9:00AM-6:00PM; SUN ...
4,DR. ANNA LEAH TORRES-SISON DENTAL CLINIC,M-S 10:AM-5:00PM BY APPOINTMENT,"(074) 442-4040, (0917) 560-4562","RM. A1, 3/F DOÑA ANIT","A#6 B6L DSEGS.S 2IO,N ROAD",BAGUIO CITY,BENGUET,CAR - CORDILLERA ADMINISTRATIVE REGION,DR. ANNA LEAH TORRES-SISON DENTAL CLINIC M-S 1...
...,...,...,...,...,...,...,...,...,...
161,GDH DENTAL CLINIC,M-S 10:00AM-6:00PM BY APPOINTMENT,(02) 410-2257,10-A STO. NINO ST.,"BRGY. SAN ANTONIO, S",AQNU FERZAONNC CISICTOY DEL M,ODNITSETRICT 2,NCR - NATIONAL CAPITAL REGION,GDH DENTAL CLINIC M-S 10:00AM-6:00PM BY APPOIN...
162,DENTAL CARE CORNER,M-S 10:00AM-7:00PM,"(02) 928-0256, (02) 957-4416, (0906) 305-6031","2/F UNIT A, 56 VISAYAS",ABVREGNYU. VEASRA,QUEZON CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION,DENTAL CARE CORNER M-S 10:00AM-7:00PM (02) 928...
163,DR. FLORENCE A. DE VENECIA DENTAL CLINIC - UCP...,M-S 9:00AM-5:00PM BY APPOINTMENT,"(02) 736-7142, (02) 924-2369, (02) 990-2373, (...",RM 403 UCPI (KALAYAA,"ND)I,L IMMAAGNI,NOO ST.,",QUEZON CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION,DR. FLORENCE A. DE VENECIA DENTAL CLINIC - UCP...
164,BUHAIN DENTAL CLINIC,M-S 9:00AM-5:00PM STRICTLY BY APPOINTMENT,"(02) 434-8181, (02) 435-3556, (0927) 498-5239",UNIT 102 EAGLE STAR C,OLONYDOOL.A 2 H5 EFIAGBHIATSN DELA,QRUOESZAO SNT. CITY,DISTRICT 2,NCR - NATIONAL CAPITAL REGION,BUHAIN DENTAL CLINIC M-S 9:00AM-5:00PM STRICTL...


In [15]:
documents = dataframed['combined'].tolist()

In [16]:
embeddings = [get_embedding(doc, engine='text-embedding-3-small') for doc in documents]

In [17]:
embedding_dim = len(embeddings[0])

In [18]:
embedding_np = np.array(embeddings).astype('float32')

In [19]:
index = faiss.IndexFlatL2(embedding_dim)

In [20]:
index.add(embedding_np)

In [21]:
user_message = "Where is the Alip Dental Clinic?"

In [22]:
query_embedding = get_embedding(user_message, engine='text-embedding-3-small')
query_embedding_np = np.array([query_embedding]).astype('float32')

In [23]:
_, indices = index.search(query_embedding_np, 1)

In [24]:
retrieved_docs = [documents[i] for i in indices[0]]

In [25]:
context = ' '.join(retrieved_docs)

In [26]:
structured_prompt = f'Context: \n{context} \n\n Query: {user_message} \n\n Response:'

In [27]:
print(structured_prompt)

Context: 
ALIP DENTAL CLINIC M-S 9:00AM-5:00PM BY APPOINTMENT (074) 448-8341, (0917) 627-2610 B108 JUNIPER BLDG., GEN. LUNA RD BAGUIO CITY BENGUET CAR - CORDILLERA ADMINISTRATIVE REGION 

 Query: Where is the Alip Dental Clinic? 

 Response:


In [31]:
struct = [{"role": "system", "content": "You are a helpful assistant"}]

In [32]:
chat =  openai.ChatCompletion.create(model = "gpt-4o-mini", messages = struct + [{"role": "user", "content" : structured_prompt}], temperature=0.5, max_tokens=1500, top_p=1, frequency_penalty=0, presence_penalty=0)
struct.append({"role": "user", "content": user_message})
response = chat.choices[0].message.content
struct.append({"role": "assistant", "content": response})

In [33]:
print(response)

The Alip Dental Clinic is located at B108 Juniper Building, Gen. Luna Road, Baguio City, Benguet, in the Cordillera Administrative Region of the Philippines.
