In [1]:
# RAG_System.ipynb

# ============================
# 1. Install Required Packages
# ============================
# You might already have some or all of these. If so, you can skip or comment them out.
# %pip install langchain transformers chromadb sentence-transformers accelerate bitsandbytes  # etc.

import os
import torch
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain.prompts import PromptTemplate
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

import shutil
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import gc
from tqdm import tqdm, trange


gc.collect()
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "mps" 

In [3]:
# ============================
# 2. Configuration
# ============================
# Path to data folder
TEXT_DATA_PATH = ["../data/zianp", "../data/dunhanj"] 
ROW_EVENT_PATH = ['../data/nicolaw']
STATIC_WEB_CSV_PATH = '../data/texts_urls_filtered.csv'
custom_cache_dir = "/mnt/new_volume"

# Choose an embedding model.
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
LLM_INPUT = "qwen2"
# Choose a local LLM model.
# LLM_MODEL_ID = "tiiuae/falcon-7b-instruct"
# LLM_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
LLM_MODEL_MAP = {"falcon": "tiiuae/falcon-7b-instruct"
                ,"llama3": "meta-llama/Llama-3.1-8B-Instruct"
                ,"deepseek-r1": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
                ,"phi-4": "unsloth/phi-4-bnb-4bit"
                ,"qwen2": "Qwen/Qwen2-7B-Instruct"
                ,"qwq":"Qwen/QwQ-32B"}

LLM_MODEL_ID = LLM_MODEL_MAP[LLM_INPUT]
# LLM_MODEL_ID = "unsloth/phi-4-bnb-4bit"

LLM_NAME = LLM_MODEL_ID.split("/")[-1]
data_file = "qa400"
test_data_path ="../annotations/{}.csv".format(data_file)

retriever_top_k = 10
CHUNK_SIZE = 512  
CHUNK_OVERLAP = 100
RELOAD_VECTORS_DB = True



In [4]:
# Classify files in the folder

files_txt_path = []
files_csv_path = []
files_event_path = []

for DATA_PATH in TEXT_DATA_PATH:
    for root, dirs, files in os.walk(DATA_PATH):
        for file in files:
            if file.endswith('.txt'):
                files_txt_path.append(os.path.join(root, file))
            elif file.endswith('.csv'):
                files_csv_path.append(os.path.join(root, file))

for DATA_PATH in ROW_EVENT_PATH:
    for root, dirs, files in os.walk(DATA_PATH):
        for file in files:
            if file.endswith('.txt'):
                files_event_path.append(os.path.join(root, file))




In [5]:

# ============================
# 2. Load Files with Different Strategies
# ============================
all_documents = []

# Load Dunhan CSV
test_df = pd.read_csv(STATIC_WEB_CSV_PATH)
for index, row in test_df.iterrows():
    
    all_documents.append(Document(page_content=row['TEXT'], metadata={"source": row['URL']}))

# Load all files in the directory
for file_path in files_txt_path:
    loader = TextLoader(file_path, encoding="utf-8")
    doc = loader.load()  # Load entire file as one document
    all_documents.append(Document(page_content=doc[0].page_content, metadata={"source": file_path}))

for file_path in files_csv_path:
    df = pd.read_csv(file_path)
    filename = os.path.basename(file_path)
    for index, row in df.iterrows():
        row_text = f"{filename} | " + " | ".join(f"{col}: {row[col]}" for col in df.columns)
        metadata = {"source": filename, "row_id": index}
        all_documents.append(Document(page_content=row_text, metadata=metadata))


# OPTIOANL function for processing files row by row
    # ✅ Load row by row (structured data)
for file_path in files_event_path:
    with open(file_path, "r", encoding="utf-8") as file:
        for row_id, line in enumerate(file):
            line = line.strip()
            if line:  # Ignore empty lines
                all_documents.append(Document(page_content=line, metadata={"source": filename, "row_id": row_id}))


print(f"Loaded {len(all_documents)} raw documents from {len(os.listdir(DATA_PATH))} files.")

# ============================
# 3. Split Longer Documents for Better Retrieval
# ============================
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    separators=["\n\n", "\n", " ", ""]
)

split_documents = []
for doc in all_documents:
    chunks = text_splitter.split_text(doc.page_content)  # Split if needed
    for chunk in chunks:
        split_documents.append(Document(page_content=chunk, metadata=doc.metadata))

print(f"Total {len(split_documents)} final chunks prepared for vector storage.")


Loaded 8540 raw documents from 13 files.
Total 99280 final chunks prepared for vector storage.


In [6]:

# ============================
# 4. Create Embeddings
# ============================
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME, cache_folder = custom_cache_dir)
print("Embeddings loaded successfully.")

# ============================
# 5. Manage Vector Store
# ============================
persist_directory = "chroma_db"

# Check if the vector store exists and delete it if necessary
if RELOAD_VECTORS_DB:

    if os.path.exists(persist_directory):
        print("Vector store exists. Deleting existing database...")
        shutil.rmtree(persist_directory)  # Deletes the existing database folder

    # Recreate the vector store
    vectorstore = Chroma.from_documents(
        documents=split_documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
else:
    vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    print("Local Vector store loaded successfully.")

vectorstore.persist()
print("Vector store recreated and persisted.")



  embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME, cache_folder = custom_cache_dir)


Embeddings loaded successfully.
Vector store exists. Deleting existing database...
Vector store recreated and persisted.


  vectorstore.persist()


In [7]:

# ============================
# 6. Set Up the LLM (Falcon 7B Instruct)
# ============================
# Load the tokenizer and model
print(f"Loading {LLM_MODEL_ID}; this may take some time...")
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID, trust_remote_code=True, cache_dir=custom_cache_dir)
tokenizer.pad_token = tokenizer.eos_token  
model = AutoModelForCausalLM.from_pretrained(
    LLM_MODEL_ID,
    torch_dtype=torch.float16,
    device_map= device,           # automatically place model layers on available GPU
    trust_remote_code=True,
    cache_dir=custom_cache_dir
)


Loading Qwen/Qwen2-7B-Instruct; this may take some time...


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards:   0%|          | 0/4 [00:27<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# Create a text-generation pipeline
pipeline_llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=20,
    temperature= 0.1,       # Lower temperature for more factual answers
    top_p=0.9,
    repetition_penalty=1.2,
    do_sample=True,
)

# Wrap the pipeline in a LangChain LLM
llm = HuggingFacePipeline(pipeline=pipeline_llm)


# Customized Prompt

QA_Prompt = """
You are an expert assistant answering factual questions about Pittsburgh or Carnegie Mellon University (CMU). 
Use the retrieved context to give a detailed and helpful answer. If the provided context does not contain the answer, leverage your pretraining knowledge to provide the correct answer. 

Important Instructions:
- Answer concisely without repeating the question.
- Use the provided context if relevant; otherwise, rely on your pretraining knowledge.
- Do **not** use complete sentences. Provide only the word, name, date, or phrase that directly answers the question. For example, given the question "When was Carnegie Mellon University founded?", you should only answer "1900".

Retrieved Context:
---
{context}
---

Examples:

Question: In less than 5 words, Who is Pittsburgh named after? 
Answer: William Pitt \n
Question: In less than 5 words, What famous machine learning venue had its first conference in Pittsburgh in 1980? 
Answer: ICML \n
Question: In less than 5 words, What musical artist is performing at PPG Arena on October 13? 
Answer: Billie Eilish \n

Now it's your turn. Please answer the following question based on the above context. Remember to answer as short as possible. 

Question: In less than 5 words, {question} \n\n
Answer:
"""

custom_prompt = PromptTemplate(template=QA_Prompt, input_variables=["context", "question"])


# ============================
# 7. Create the RetrievalQA Chain
# ============================
retriever = vectorstore.as_retriever(search_kwargs={"k": retriever_top_k})


def ask_question(query: str):
    """
    Run a query through the RAG pipeline and return the generated answer along with the source documents.
    
    Args:
        query (str): The user’s question.

    Returns:
        answer (str): The generated answer.
        sources (list): List of retrieved documents used to generate the answer.
    """
    # Retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(query)
    # print(f"Retrieved {len(retrieved_docs)} documents.")
    
    # Extract text from retrieved documents
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    # print(f"Context length: {len(context)} characters.")
    # print('------ START CONTEXT ------')
    # print(context)
    # print('------ END CONTEXT ------')

    # Format the input using the QA_Prompt
    formatted_prompt = QA_Prompt.format(context=context, question=query)
    
    # Generate response using the LLM
    result = llm(formatted_prompt)  # Pass the fully formatted input
    answer = result.replace(formatted_prompt, "").strip()
    # Extract answer and sources
    answer = answer.strip()  # Ensure clean output
    return answer, retrieved_docs  # Return both answer and retrieved documents


Device set to use cuda
  llm = HuggingFacePipeline(pipeline=pipeline_llm)


In [9]:
# QA_Prompt.format(context='d', question='2')

In [10]:
df = pd.read_csv(test_data_path)

In [11]:

questions = []
references = []
answers = []
sources = []
errors = []
full = df.shape[0]
subset = full

for i in trange(full):
    row = df.iloc[i]

    answer = "I don't know."

    try:
        answer, retrieved_docs = ask_question(row['question'])
    except:
        errors.append((row['question']))
        continue
    # print(answer)
    answer = answer.strip()
    print(answer)
    answer = answer.split('\n')[0]
    answers.append(answer)
    questions.append(row['question'])
    sources.append(retrieved_docs)
    references.append(row['reference_answer'])


df_ans = pd.DataFrame({'question': questions, 'answer': answers, 'reference_answer': references, 'source': sources})


  retrieved_docs = retriever.get_relevant_documents(query)
  result = llm(formatted_prompt)  # Pass the fully formatted input
  0%|          | 1/435 [00:02<21:12,  2.93s/it]

William Pitt


  0%|          | 2/435 [00:03<10:10,  1.41s/it]

1900


  1%|          | 3/435 [00:03<07:05,  1.02it/s]

J&L Steel Company bridge


  1%|          | 4/435 [00:04<05:15,  1.37it/s]

Richard Socher


  1%|          | 5/435 [00:04<04:29,  1.59it/s]

The Pittsburgh Downtown Partnership


  1%|▏         | 6/435 [00:04<03:46,  1.89it/s]

Cultural District


  2%|▏         | 7/435 [00:05<03:13,  2.21it/s]

Kevin McMahon


  2%|▏         | 8/435 [00:05<02:52,  2.48it/s]

Individual Giving


  2%|▏         | 9/435 [00:05<02:43,  2.60it/s]

Please contact us


  2%|▏         | 10/435 [00:06<02:33,  2.77it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Gift Illustrator


  3%|▎         | 11/435 [00:06<02:44,  2.58it/s]

Steelers Hall of Honor Museum


  3%|▎         | 12/435 [00:06<02:42,  2.60it/s]

Acrisure Stadium


  3%|▎         | 13/435 [00:07<02:36,  2.69it/s]

Roberto Clemente


  3%|▎         | 14/435 [00:07<02:23,  2.93it/s]

Penn Brewery


  3%|▎         | 15/435 [00:07<02:32,  2.76it/s]

City Brew Tours Pittsburgh Trail


  4%|▎         | 16/435 [00:08<02:37,  2.65it/s]

Congregation Beth Shalom


  4%|▍         | 17/435 [00:08<02:31,  2.75it/s]

David L.


---


  4%|▍         | 18/435 [00:09<02:28,  2.81it/s]

David Linton


  4%|▍         | 19/435 [00:09<02:37,  2.64it/s]

Big Nosh Jewish Food Festival


  5%|▍         | 20/435 [00:09<02:27,  2.81it/s]

Steel Tree Fund


  5%|▍         | 21/435 [00:10<02:24,  2.86it/s]

Allegheny River


  5%|▌         | 22/435 [00:10<02:18,  2.98it/s]

None mentioned


  5%|▌         | 23/435 [00:10<02:22,  2.89it/s]

Carnegie Mellon University


  6%|▌         | 24/435 [00:11<02:17,  2.99it/s]

Andrew Carnegie


  6%|▌         | 25/435 [00:11<02:17,  2.99it/s]

Oakland Campus


  6%|▌         | 26/435 [00:11<02:30,  2.72it/s]

accessibility@nfl.com


  6%|▌         | 27/435 [00:12<02:20,  2.90it/s]

Standards


  6%|▋         | 28/435 [00:12<02:29,  2.72it/s]

Steelers Official Mobile App


  7%|▋         | 29/435 [00:12<02:29,  2.71it/s]

Acrisure Stadium


  7%|▋         | 30/435 [00:13<02:42,  2.49it/s]

Steeler Youth Football Camps


  7%|▋         | 31/435 [00:14<03:40,  1.83it/s]

5000 Forbes Avenue


Given the context, which university has been recognized for its contributions to


  7%|▋         | 32/435 [00:14<03:14,  2.07it/s]

Fort Pitt Bridge


  8%|▊         | 33/435 [00:15<03:05,  2.17it/s]

Graham Neubig


  8%|▊         | 34/435 [00:15<03:06,  2.15it/s]

How Fabulous is That!


  8%|▊         | 35/435 [00:15<02:49,  2.35it/s]

1900


  8%|▊         | 36/435 [00:16<02:57,  2.25it/s]

Carnegie Medalist Pageant


  9%|▊         | 37/435 [00:16<02:54,  2.29it/s]

Carnegie Mellon University


  9%|▊         | 38/435 [00:17<02:38,  2.51it/s]

Latrobe


  9%|▉         | 39/435 [00:17<02:34,  2.56it/s]

Banana Split Fest


  9%|▉         | 40/435 [00:18<03:33,  1.85it/s]

Protesters march


Question: On what specific date did the protest take place?
Answer: Mar


  9%|▉         | 41/435 [00:18<03:26,  1.91it/s]

Carnegie Museum of Natural History


 10%|▉         | 42/435 [00:19<03:10,  2.06it/s]

John Dvorak


 10%|▉         | 43/435 [00:19<02:52,  2.28it/s]

PNC Park


 10%|█         | 44/435 [00:19<02:48,  2.32it/s]

Pittsburgh Improv


 10%|█         | 45/435 [00:20<02:50,  2.29it/s]

Diary of a Wombat


 11%|█         | 46/435 [00:20<02:47,  2.32it/s]

Dahshur boat


 11%|█         | 47/435 [00:21<02:53,  2.24it/s]

"Treasures from Ancient Egypt"


 11%|█         | 48/435 [00:21<02:44,  2.35it/s]

Mostafa Sherif


 11%|█▏        | 49/435 [00:22<02:42,  2.38it/s]

National Museum of Egyptian Civilization


 11%|█▏        | 50/435 [00:22<02:40,  2.39it/s]

Carnegie Mellon University


 12%|█▏        | 51/435 [00:22<02:43,  2.35it/s]

early 2024


 12%|█▏        | 52/435 [00:23<02:23,  2.67it/s]

Two


 12%|█▏        | 53/435 [00:23<02:22,  2.67it/s]

Baker Music School


 12%|█▏        | 54/435 [00:23<02:22,  2.68it/s]

Lullaby Project


 13%|█▎        | 55/435 [00:24<02:12,  2.87it/s]

ten


 13%|█▎        | 56/435 [00:24<02:10,  2.90it/s]

Double Wide Grill


 13%|█▎        | 57/435 [00:25<02:20,  2.68it/s]

Full Pint Beer


 13%|█▎        | 58/435 [00:25<02:20,  2.68it/s]

1910


 14%|█▎        | 59/435 [00:25<02:28,  2.53it/s]

"Pittsburgh City Paper"


 14%|█▍        | 60/435 [00:26<02:20,  2.68it/s]

Fat Tuesday


 14%|█▍        | 61/435 [00:26<02:15,  2.76it/s]

New Englanders


 14%|█▍        | 62/435 [00:26<02:24,  2.57it/s]

More than four-fifths


 14%|█▍        | 63/435 [00:27<02:26,  2.55it/s]

White Trillium


 15%|█▍        | 64/435 [00:27<02:14,  2.75it/s]

Cleveland


 15%|█▍        | 65/435 [00:28<02:14,  2.75it/s]

Carnegie Mellon University


 15%|█▌        | 66/435 [00:28<02:31,  2.44it/s]

Mamma Mia Bar-Pittsburgh


 15%|█▌        | 67/435 [00:28<02:26,  2.51it/s]

2025


 16%|█▌        | 68/435 [00:29<02:21,  2.59it/s]

Pittsburgh Renaissance Project


 16%|█▌        | 69/435 [00:29<02:21,  2.59it/s]

Fourth Floor, Floor 2


 16%|█▌        | 70/435 [00:30<02:16,  2.66it/s]

Pittsburgh Downtown Partnership


 16%|█▋        | 71/435 [00:30<03:17,  1.84it/s]

Community Impact Awards


Given the context below, write a new sentence using information from the text.

The


 17%|█▋        | 72/435 [00:31<03:01,  1.99it/s]

"a feminist journey"


 17%|█▋        | 73/435 [00:31<02:48,  2.15it/s]

Rob Brezsny


 17%|█▋        | 74/435 [00:32<02:26,  2.46it/s]

University of Pittsburgh


 17%|█▋        | 75/435 [00:32<02:19,  2.58it/s]

Community Awards Ceremony


 17%|█▋        | 76/435 [00:32<02:17,  2.61it/s]

November 22


 18%|█▊        | 77/435 [00:33<02:11,  2.71it/s]

University of Pittsburgh


 18%|█▊        | 78/435 [00:33<02:28,  2.41it/s]

Wild Illuminations: A Holiday Lantern Experience


 18%|█▊        | 79/435 [00:33<02:26,  2.43it/s]

2001


 18%|█▊        | 80/435 [00:34<02:18,  2.57it/s]

Summer Crawl


 19%|█▊        | 81/435 [00:34<02:07,  2.77it/s]

Pittsburgh


 19%|█▉        | 82/435 [00:35<02:11,  2.69it/s]

Pittsburgh Steelers


 19%|█▉        | 83/435 [00:35<02:10,  2.70it/s]

Mike Tomlin


 19%|█▉        | 84/435 [00:35<02:13,  2.62it/s]

Carnegie Mellon University


 20%|█▉        | 85/435 [00:36<02:20,  2.50it/s]

Carnegie Museum of Art


 20%|█▉        | 86/435 [00:36<02:04,  2.79it/s]

Pirates


 20%|██        | 87/435 [00:36<02:06,  2.76it/s]

Carnegie Mellon University


 20%|██        | 88/435 [00:37<02:02,  2.83it/s]

Sixth Street


 20%|██        | 89/435 [00:37<02:03,  2.80it/s]

Pittsburgh Pirates game


 21%|██        | 90/435 [00:37<02:05,  2.75it/s]

Pittsburgh Pirates


 21%|██        | 91/435 [00:38<02:08,  2.67it/s]

Kresge Theater


 21%|██        | 92/435 [00:38<02:12,  2.60it/s]

John Marcinizyn


 21%|██▏       | 93/435 [00:39<02:10,  2.63it/s]

Program: TBA


 22%|██▏       | 94/435 [00:39<02:19,  2.44it/s]

5000 Forbes Avenue


 22%|██▏       | 95/435 [00:39<02:17,  2.48it/s]

Kresge Theater


 22%|██▏       | 96/435 [00:40<02:13,  2.55it/s]

Carnegie Tech


 22%|██▏       | 97/435 [00:40<02:06,  2.67it/s]

Three Sisters bridges


 23%|██▎       | 98/435 [00:41<02:17,  2.45it/s]

Arthur J. Rooney, Sr.


 23%|██▎       | 99/435 [00:41<02:13,  2.51it/s]

Schenley Park


 23%|██▎       | 100/435 [00:41<02:10,  2.56it/s]

Gabe Zichermann


 23%|██▎       | 101/435 [00:42<02:05,  2.66it/s]

Golden Triangle Bridge


 23%|██▎       | 102/435 [00:42<02:04,  2.68it/s]

Carnegie Mellon University


 24%|██▎       | 103/435 [00:43<02:04,  2.67it/s]

Henry Clay Frick


 24%|██▍       | 104/435 [00:43<02:13,  2.48it/s]

How Fabulous is That!


 24%|██▍       | 105/435 [00:43<02:06,  2.61it/s]

Downtown


 24%|██▍       | 106/435 [00:44<02:09,  2.55it/s]

Garage at Theater Square


 25%|██▍       | 107/435 [00:44<02:15,  2.42it/s]

Pittsburgh Ballet Theatre


 25%|██▍       | 108/435 [00:45<02:11,  2.48it/s]

Arcade Comedy Theater


 25%|██▌       | 109/435 [00:45<02:08,  2.55it/s]

803 Liberty Avenue


 25%|██▌       | 110/435 [00:45<01:54,  2.84it/s]

Andrew Mellon


 26%|██▌       | 111/435 [00:46<01:55,  2.81it/s]

答案：Tom Mitchell


 26%|██▌       | 112/435 [00:46<02:04,  2.60it/s]

Lords Of The Sound Orchestra


 26%|██▌       | 113/435 [00:46<02:01,  2.66it/s]

Tim Rice


 26%|██▌       | 114/435 [00:47<02:12,  2.42it/s]

Greer Cabaret Theater


 26%|██▋       | 115/435 [00:47<01:59,  2.68it/s]

University of Pittsburgh


 27%|██▋       | 116/435 [00:48<02:54,  1.83it/s]

"Jesus Christ Superstar"
The answer can be found within the text where it mentions "Jesus Christ


 27%|██▋       | 117/435 [00:48<02:35,  2.05it/s]

Carnegie Mellon University


 27%|██▋       | 118/435 [00:49<02:16,  2.32it/s]

Pittsburgh


 27%|██▋       | 119/435 [00:49<02:00,  2.61it/s]

Technology Park


 28%|██▊       | 120/435 [00:49<02:07,  2.47it/s]

Carnegie Museum of Art


 28%|██▊       | 121/435 [00:50<02:03,  2.54it/s]

Carnegie magazine


 28%|██▊       | 122/435 [00:50<02:13,  2.35it/s]

La Traviata


 28%|██▊       | 123/435 [00:51<02:10,  2.38it/s]

Benedum Center


 29%|██▊       | 124/435 [00:51<02:05,  2.49it/s]

Joseph Haydn


 29%|██▊       | 125/435 [00:52<02:05,  2.47it/s]

Carnegie Mellon University Foundation


 29%|██▉       | 126/435 [00:52<02:15,  2.28it/s]

Pittsburgh Opera's mobile app


 29%|██▉       | 127/435 [00:52<01:58,  2.59it/s]

Home


 29%|██▉       | 128/435 [00:53<01:46,  2.87it/s]

Soft


 30%|██▉       | 129/435 [00:53<01:51,  2.73it/s]

Carnegie Mellon University


 30%|██▉       | 130/435 [00:53<01:41,  3.00it/s]

Home


 30%|███       | 131/435 [00:54<01:41,  3.00it/s]

Pittsburgh


 30%|███       | 132/435 [00:54<01:54,  2.64it/s]

Pittsburgh City-County Building


 31%|███       | 133/435 [00:55<02:09,  2.33it/s]

Bureau of Environmental Services


 31%|███       | 134/435 [00:55<02:04,  2.43it/s]

Community Paramedicine Program


 31%|███       | 135/435 [00:55<01:59,  2.50it/s]

Carnegie Mellon University


 31%|███▏      | 136/435 [00:56<01:50,  2.71it/s]

Golden Triangle


 31%|███▏      | 137/435 [00:56<01:50,  2.69it/s]

Pittsburgh Steelers


 32%|███▏      | 138/435 [00:56<01:47,  2.77it/s]

Women's Business Center


 32%|███▏      | 139/435 [00:57<01:44,  2.82it/s]

Greenfield Booksellers


 32%|███▏      | 140/435 [00:57<01:43,  2.86it/s]

The Colombian Spot


 32%|███▏      | 141/435 [00:57<01:38,  2.98it/s]

Eric Baker


 33%|███▎      | 142/435 [00:58<01:50,  2.64it/s]

Carnegie Museums of Pittsburgh


 33%|███▎      | 143/435 [00:58<01:43,  2.81it/s]

90


 33%|███▎      | 144/435 [00:59<01:50,  2.62it/s]

Palmer & Hornbostel


 33%|███▎      | 145/435 [00:59<01:55,  2.50it/s]

Carnegie Museums of Pittsburgh


 34%|███▎      | 146/435 [00:59<01:52,  2.56it/s]

Carnegie Mellon University


 34%|███▍      | 147/435 [01:00<02:38,  1.81it/s]

Over sixty


Given the context, which includes information about various Pittsburgh breweries such as Brewing Company, Cob


 34%|███▍      | 148/435 [01:01<02:18,  2.07it/s]

Pirates.com


 34%|███▍      | 149/435 [01:01<02:21,  2.02it/s]

No specific person mentioned for current manager position


 34%|███▍      | 150/435 [01:01<02:07,  2.24it/s]

PNC Park


 35%|███▍      | 151/435 [01:02<02:06,  2.24it/s]

Spencer Horwitz


 35%|███▍      | 152/435 [01:03<02:27,  1.92it/s]

"Top 30 Prospects list"
---


 35%|███▌      | 153/435 [01:03<02:16,  2.06it/s]

"Pittsburgh Marathon"


 35%|███▌      | 154/435 [01:03<02:06,  2.22it/s]

Pittsburgh Downtown Partnership


 36%|███▌      | 155/435 [01:04<02:02,  2.28it/s]

St. Patrick's Day Parade


 36%|███▌      | 156/435 [01:04<02:21,  1.98it/s]

Inaugural Pittsburgh Parade Day Dash Presented by GNC


 36%|███▌      | 157/435 [01:05<02:05,  2.22it/s]

GNC


 36%|███▋      | 158/435 [01:05<02:07,  2.17it/s]

History Maker Honorees Awards Dinner


 37%|███▋      | 159/435 [01:06<02:01,  2.27it/s]

Juan Diego


 37%|███▋      | 160/435 [01:06<01:56,  2.37it/s]

Heinz History Center


 37%|███▋      | 161/435 [01:06<01:51,  2.45it/s]

Atria's


 37%|███▋      | 162/435 [01:07<01:42,  2.66it/s]

Laura Early


 37%|███▋      | 163/435 [01:07<01:48,  2.51it/s]

Heinz College Dean's Circle


 38%|███▊      | 164/435 [01:08<01:43,  2.63it/s]

Warner Circle


 38%|███▊      | 165/435 [01:08<01:35,  2.82it/s]

Pittsburgh


 38%|███▊      | 166/435 [01:08<01:42,  2.63it/s]

Heinz College Dean's Circle


 38%|███▊      | 167/435 [01:09<01:46,  2.51it/s]

W. L. Mellon Society


 39%|███▊      | 168/435 [01:09<01:38,  2.72it/s]

Free Throws


 39%|███▉      | 169/435 [01:09<01:35,  2.78it/s]

PNC Park


 39%|███▉      | 170/435 [01:10<01:39,  2.67it/s]

Dole Great American Banana Challenge


 39%|███▉      | 171/435 [01:10<01:45,  2.51it/s]

Contact Person Not Provided


 40%|███▉      | 172/435 [01:11<01:51,  2.35it/s]

"Partnership Campaign"
---


 40%|███▉      | 173/435 [01:11<02:12,  1.98it/s]

803 Liberty Avenue Pittsburgh, PA 15222


 40%|████      | 174/435 [01:12<01:58,  2.20it/s]

Major Gifts Officer


 40%|████      | 175/435 [01:12<01:45,  2.47it/s]

Individual Giving


 40%|████      | 176/435 [01:13<02:07,  2.04it/s]

(412) 471-6070


 41%|████      | 177/435 [01:13<01:54,  2.25it/s]

Please contact us


 41%|████      | 178/435 [01:13<01:48,  2.37it/s]

Carnegie Mellon University


 41%|████      | 179/435 [01:14<01:50,  2.32it/s]

Pittsburgh Sound + Image


 41%|████▏     | 180/435 [01:14<01:44,  2.43it/s]

Helen Smith


 42%|████▏     | 181/435 [01:15<01:42,  2.47it/s]

"a feminist journey"


 42%|████▏     | 182/435 [01:15<01:40,  2.53it/s]

John Brisker


 42%|████▏     | 183/435 [01:15<01:40,  2.50it/s]

CMU Alumni Association Board


 42%|████▏     | 184/435 [01:16<01:40,  2.49it/s]

The number isn't specified


 43%|████▎     | 185/435 [01:16<01:35,  2.62it/s]

Student Representative Committee


 43%|████▎     | 186/435 [01:17<01:36,  2.57it/s]

Carnegie Society Representative


 43%|████▎     | 187/435 [01:17<01:26,  2.85it/s]

Board


 43%|████▎     | 188/435 [01:17<01:25,  2.88it/s]

Pittsburgh


 43%|████▎     | 189/435 [01:18<01:35,  2.58it/s]

Bookish in the 'Burgh


 44%|████▎     | 190/435 [01:18<01:41,  2.42it/s]

WOVEN IN MOONLIGHT


 44%|████▍     | 191/435 [01:19<01:46,  2.30it/s]

THE BOOK OF KELLYS


 44%|████▍     | 192/435 [01:19<01:40,  2.43it/s]

Riverstone Books


 44%|████▍     | 193/435 [01:19<01:28,  2.74it/s]

Four


 45%|████▍     | 194/435 [01:20<01:28,  2.74it/s]

Miniature Railroad & Village


 45%|████▍     | 195/435 [01:20<01:33,  2.57it/s]

Carnegie Museum of Natural History


 45%|████▌     | 196/435 [01:20<01:37,  2.45it/s]

Carnegie Museum(s)


 45%|████▌     | 197/435 [01:21<01:29,  2.67it/s]

Andrew Carnegie


 46%|████▌     | 198/435 [01:21<01:31,  2.60it/s]

Arcade Comedy Theater


 46%|████▌     | 199/435 [01:22<01:30,  2.61it/s]

Three Rivers Arts Festival


 46%|████▌     | 200/435 [01:22<01:34,  2.49it/s]

Wigle Whiskey


 46%|████▌     | 201/435 [01:22<01:36,  2.42it/s]

Coven of Misfits


 46%|████▋     | 202/435 [01:23<01:28,  2.64it/s]

City Controller


 47%|████▋     | 203/435 [01:23<01:45,  2.21it/s]

412-456-6666


 47%|████▋     | 204/435 [01:24<01:48,  2.13it/s]

$100,000


 47%|████▋     | 205/435 [01:24<01:38,  2.34it/s]

Join Our Email List


 47%|████▋     | 206/435 [01:25<01:56,  1.97it/s]

803 Liberty Avenue Pittsburgh, PA 15222


 48%|████▊     | 207/435 [01:25<01:46,  2.15it/s]

Monthly Donor Portal


 48%|████▊     | 208/435 [01:26<01:33,  2.42it/s]

Winter Beerfest


 48%|████▊     | 209/435 [01:26<01:25,  2.65it/s]

Parade


 48%|████▊     | 210/435 [01:26<01:25,  2.64it/s]

No specific artist mentioned


 49%|████▊     | 211/435 [01:27<01:29,  2.50it/s]

EQT Children's Theater Festival


 49%|████▊     | 212/435 [01:27<01:29,  2.49it/s]

DJ Soy Sos


 49%|████▉     | 213/435 [01:27<01:27,  2.54it/s]

Carnegie Mellon University


 49%|████▉     | 214/435 [01:28<01:27,  2.52it/s]

Sacks for Kids


 49%|████▉     | 215/435 [01:28<01:28,  2.50it/s]

Pittsburgh Cares


 50%|████▉     | 216/435 [01:29<01:27,  2.49it/s]

Cameron Heyward


 50%|████▉     | 217/435 [01:29<01:27,  2.48it/s]

Pink Lemonade Stand Challenge


 50%|█████     | 218/435 [01:29<01:27,  2.47it/s]

PPG Paints Arena


 50%|█████     | 219/435 [01:30<01:20,  2.68it/s]

Market Square


 51%|█████     | 220/435 [01:30<01:19,  2.71it/s]

Pittsburgh Downtown Partnership


 51%|█████     | 221/435 [01:30<01:10,  3.03it/s]

Fourth Street


 51%|█████     | 222/435 [01:31<01:08,  3.11it/s]

Pickle Fest


 51%|█████▏    | 223/435 [01:31<01:06,  3.19it/s]

Penguins


 51%|█████▏    | 224/435 [01:31<01:11,  2.93it/s]

Roberto Clemente Bridge


 52%|█████▏    | 225/435 [01:32<01:07,  3.10it/s]

Adrian College


 52%|█████▏    | 226/435 [01:32<01:05,  3.19it/s]

Pittsburgh


 52%|█████▏    | 227/435 [01:32<01:04,  3.22it/s]

Mike Sullivan


 52%|█████▏    | 228/435 [01:33<01:10,  2.95it/s]

four point eight million


 53%|█████▎    | 229/435 [01:33<01:07,  3.04it/s]

Invisible


 53%|█████▎    | 230/435 [01:33<01:07,  3.02it/s]

The Sleeping Negro


 53%|█████▎    | 231/435 [01:34<01:12,  2.81it/s]

IMMACULATE DECEPTION


 53%|█████▎    | 232/435 [01:34<01:20,  2.53it/s]

BNY Mellon Presents JazzLive Series


 54%|█████▎    | 233/435 [01:35<01:21,  2.48it/s]

1784


 54%|█████▍    | 234/435 [01:35<01:21,  2.48it/s]

Alfred Hitchcock


 54%|█████▍    | 235/435 [01:35<01:18,  2.54it/s]

Pittsburgh Opera


 54%|█████▍    | 236/435 [01:36<01:18,  2.52it/s]

Lauryn Davis


 54%|█████▍    | 237/435 [01:36<01:16,  2.60it/s]

Keija Yu


 55%|█████▍    | 238/435 [01:36<01:13,  2.69it/s]

login.cmu.edu


 55%|█████▍    | 239/435 [01:37<01:15,  2.61it/s]

it-help@cmu.edu


 55%|█████▌    | 240/435 [01:37<01:18,  2.48it/s]

https://login.cmu.edu


 55%|█████▌    | 241/435 [01:38<01:09,  2.79it/s]

Change Password


 56%|█████▌    | 242/435 [01:38<01:04,  2.99it/s]

Reset password


 56%|█████▌    | 243/435 [01:38<01:06,  2.90it/s]

1758


 56%|█████▌    | 244/435 [01:39<01:07,  2.81it/s]

Fort Pitt Blockhouse


 56%|█████▋    | 245/435 [01:39<01:05,  2.88it/s]

John Forbes


 57%|█████▋    | 246/435 [01:39<01:08,  2.76it/s]

Forbes' Road


 57%|█████▋    | 247/435 [01:40<01:02,  3.02it/s]

William Pitt


 57%|█████▋    | 248/435 [01:40<01:02,  3.00it/s]

Panhandle Bridge


 57%|█████▋    | 249/435 [01:40<01:00,  3.09it/s]

Market Street


 57%|█████▋    | 250/435 [01:41<01:02,  2.96it/s]

Carnegie Mellon University


 58%|█████▊    | 251/435 [01:41<00:59,  3.07it/s]

Pittsburgh


 58%|█████▊    | 252/435 [01:41<00:57,  3.18it/s]

Urban Pittsburgh campus


 58%|█████▊    | 253/435 [01:42<01:04,  2.83it/s]

Pittsburgh Fringe Festival


 58%|█████▊    | 254/435 [01:42<01:05,  2.77it/s]

Andy Warhol Museum


 59%|█████▊    | 255/435 [01:42<01:03,  2.82it/s]

1749


 59%|█████▉    | 256/435 [01:43<01:08,  2.61it/s]

Pittsburgh Symphony Orchestra Hall


 59%|█████▉    | 257/435 [01:43<01:05,  2.71it/s]

Green Building Movement


 59%|█████▉    | 258/435 [01:44<01:05,  2.70it/s]

Andy Warhol Museum


 60%|█████▉    | 259/435 [01:44<01:05,  2.71it/s]

Acrisure Stadium


 60%|█████▉    | 260/435 [01:44<01:05,  2.69it/s]

2025


 60%|██████    | 261/435 [01:45<01:02,  2.76it/s]

Croke Park


 60%|██████    | 262/435 [01:45<01:00,  2.85it/s]

Dan Rooney


 60%|██████    | 263/435 [01:45<00:59,  2.90it/s]

American Ireland Funds


 61%|██████    | 264/435 [01:46<01:00,  2.84it/s]

Acrisure Stadium


 61%|██████    | 265/435 [01:46<00:55,  3.07it/s]

Club Level


 61%|██████    | 266/435 [01:46<01:00,  2.78it/s]

The Heinz Field Club


 61%|██████▏   | 267/435 [01:47<01:04,  2.60it/s]

Clear bags up to specific dimensions


 62%|██████▏   | 268/435 [01:47<01:08,  2.42it/s]

Pittsburgh Panthers, Boston College Eagles


 62%|██████▏   | 269/435 [01:48<01:02,  2.64it/s]

Andrew Carnegie Society


 62%|██████▏   | 270/435 [01:48<01:01,  2.67it/s]

Subra Suresh


 62%|██████▏   | 271/435 [01:48<01:11,  2.28it/s]

25-0969449


 63%|██████▎   | 272/435 [01:49<01:15,  2.17it/s]

"Engage with CMU"
---


 63%|██████▎   | 273/435 [01:49<01:10,  2.30it/s]

Office of University Advancement


 63%|██████▎   | 274/435 [01:50<01:08,  2.35it/s]

Carnegie Museums


 63%|██████▎   | 275/435 [01:50<01:03,  2.51it/s]

Heinz Field


 63%|██████▎   | 276/435 [01:51<01:10,  2.25it/s]

Carnegie Library of Homestead Music Hall


 64%|██████▎   | 277/435 [01:51<01:10,  2.25it/s]

Chartiers Valley High School


 64%|██████▍   | 278/435 [01:52<01:08,  2.30it/s]

Something Rotten!
---


 64%|██████▍   | 279/435 [01:52<01:01,  2.54it/s]

CVTheatre


 64%|██████▍   | 280/435 [01:52<00:56,  2.74it/s]

CV Theatre


 65%|██████▍   | 281/435 [01:52<00:56,  2.74it/s]

Something Rotten!
---


 65%|██████▍   | 282/435 [01:53<00:54,  2.81it/s]

Liberty Magic


 65%|██████▌   | 283/435 [01:53<00:52,  2.88it/s]

1900


 65%|██████▌   | 284/435 [01:54<00:53,  2.84it/s]

Pittsburgh Cultural Trust


 66%|██████▌   | 285/435 [01:54<00:50,  2.98it/s]

Trust Presents


 66%|██████▌   | 286/435 [01:54<00:51,  2.88it/s]

LibertyMagicNewsletter


 66%|██████▌   | 287/435 [01:55<00:52,  2.83it/s]

Carnegie Mellon University


 66%|██████▌   | 288/435 [01:55<00:49,  2.98it/s]

Ed Gainey


 66%|██████▋   | 289/435 [01:55<00:48,  2.99it/s]

Rachel Carson Bridge


 67%|██████▋   | 290/435 [01:56<00:54,  2.65it/s]

Pittsburgh Cultural Trust Celebrations


 67%|██████▋   | 291/435 [01:56<00:54,  2.67it/s]

1968


 67%|██████▋   | 292/435 [01:56<00:47,  3.00it/s]

Andrew Mellon


 67%|██████▋   | 293/435 [01:57<00:47,  3.01it/s]

Athletic Nickname


 68%|██████▊   | 294/435 [01:57<00:46,  3.00it/s]

Andy Warhol


 68%|██████▊   | 295/435 [01:57<00:52,  2.66it/s]

Carnegie Museums of Pittsburgh


 68%|██████▊   | 296/435 [01:58<00:51,  2.69it/s]

Carnegie Mellon University


 68%|██████▊   | 297/435 [01:58<00:51,  2.69it/s]

Reince Priebus


 69%|██████▊   | 298/435 [01:59<00:50,  2.71it/s]

Acrisure Stadium


 69%|██████▊   | 299/435 [01:59<00:47,  2.88it/s]

Andrew Carnegie


 69%|██████▉   | 300/435 [01:59<00:47,  2.84it/s]

Winter Wine Fest


 69%|██████▉   | 301/435 [02:00<00:52,  2.57it/s]

Delpero’s visionary film


 69%|██████▉   | 302/435 [02:00<00:46,  2.85it/s]

Bank Tower


 70%|██████▉   | 303/435 [02:00<00:48,  2.73it/s]

Best Local Irish Bands


 70%|██████▉   | 304/435 [02:01<00:52,  2.49it/s]

Old Allegheny County Jail Museum


 70%|███████   | 305/435 [02:01<00:54,  2.41it/s]

Rodent Baiting Program


 70%|███████   | 306/435 [02:01<00:47,  2.71it/s]

two


 71%|███████   | 307/435 [02:02<00:43,  2.97it/s]

Residents


 71%|███████   | 308/435 [02:02<00:49,  2.56it/s]

About 200 rats or mice


 71%|███████   | 309/435 [02:03<00:47,  2.68it/s]

Rodent activity


 71%|███████▏  | 310/435 [02:03<00:47,  2.61it/s]

Carrie Furnace


 71%|███████▏  | 311/435 [02:03<00:42,  2.89it/s]

Five


 72%|███████▏  | 312/435 [02:04<00:40,  3.02it/s]

14


 72%|███████▏  | 313/435 [02:04<00:40,  2.99it/s]

12 minutes


 72%|███████▏  | 314/435 [02:04<00:43,  2.80it/s]

Pittsburgh International Airport


 72%|███████▏  | 315/435 [02:05<00:45,  2.66it/s]

WIN Workforce Development Program


 73%|███████▎  | 316/435 [02:05<00:43,  2.75it/s]

Nova Place


 73%|███████▎  | 317/435 [02:05<00:44,  2.68it/s]

"a feminist journey"


 73%|███████▎  | 318/435 [02:06<00:44,  2.61it/s]

Carnegie Mellon University


 73%|███████▎  | 319/435 [02:06<00:43,  2.65it/s]

Walk-in Hours


 74%|███████▎  | 320/435 [02:07<00:45,  2.52it/s]

St. Ursula Church


 74%|███████▍  | 321/435 [02:07<00:44,  2.59it/s]

$20


 74%|███████▍  | 322/435 [02:07<00:42,  2.65it/s]

Kirk Avenue


 74%|███████▍  | 323/435 [02:08<00:42,  2.62it/s]

2025


 74%|███████▍  | 324/435 [02:08<00:40,  2.75it/s]

Social Hall


 75%|███████▍  | 325/435 [02:08<00:40,  2.72it/s]

1935


 75%|███████▍  | 326/435 [02:09<00:41,  2.63it/s]

Pittsburgh, Pennsylvania


 75%|███████▌  | 327/435 [02:09<00:37,  2.91it/s]

Philip Murray


 75%|███████▌  | 328/435 [02:10<00:37,  2.83it/s]

AFL-CIO


 76%|███████▌  | 329/435 [02:10<00:36,  2.87it/s]

George Meany


 76%|███████▌  | 330/435 [02:10<00:37,  2.80it/s]

Thrive on Health


 76%|███████▌  | 331/435 [02:11<00:37,  2.76it/s]

Thrive on Health


 76%|███████▋  | 332/435 [02:11<00:38,  2.65it/s]

Thrive on Health Staff


 77%|███████▋  | 333/435 [02:11<00:36,  2.82it/s]

$5


 77%|███████▋  | 334/435 [02:12<00:35,  2.84it/s]

time frame


 77%|███████▋  | 335/435 [02:12<00:34,  2.88it/s]

PNC Park


 77%|███████▋  | 336/435 [02:12<00:36,  2.72it/s]

Carnegie Library Hall


 77%|███████▋  | 337/435 [02:13<00:35,  2.74it/s]

Civic Arena


 78%|███████▊  | 338/435 [02:13<00:35,  2.72it/s]

1995


 78%|███████▊  | 339/435 [02:13<00:32,  2.98it/s]

Jay Roberts


 78%|███████▊  | 340/435 [02:14<00:35,  2.71it/s]

Beechview Community Garden


 78%|███████▊  | 341/435 [02:14<00:32,  2.88it/s]

Southside


 79%|███████▊  | 342/435 [02:15<00:32,  2.82it/s]

Community Gardening Association


 79%|███████▉  | 343/435 [02:15<00:33,  2.73it/s]

Less Than Ten


 79%|███████▉  | 344/435 [02:15<00:34,  2.63it/s]

Bernard J. McKenna


 79%|███████▉  | 345/435 [02:16<00:31,  2.81it/s]

Cultural District


 80%|███████▉  | 346/435 [02:16<00:30,  2.88it/s]

PPG Arena


 80%|███████▉  | 347/435 [02:16<00:30,  2.84it/s]

Carnegie Mellon University


 80%|████████  | 348/435 [02:17<00:33,  2.59it/s]

O'Reilly Theater


 80%|████████  | 349/435 [02:17<00:35,  2.41it/s]

$45.25


 80%|████████  | 350/435 [02:18<00:32,  2.64it/s]

Shared Services


 81%|████████  | 351/435 [02:18<00:29,  2.81it/s]

First Night Pittsburgh


 81%|████████  | 352/435 [02:18<00:30,  2.69it/s]

Carnegie Mellon University


 81%|████████  | 353/435 [02:19<00:31,  2.63it/s]

Pittsburgh Food Justice Fund


 81%|████████▏ | 354/435 [02:19<00:30,  2.66it/s]

Carnegie Mellon University


 82%|████████▏ | 355/435 [02:19<00:29,  2.68it/s]

Lacrosse Association


 82%|████████▏ | 356/435 [02:20<00:41,  1.90it/s]

The context doesn't explicitly mention who founded the Pittsburgh AIDS Task Force. However, using my general knowledge


 82%|████████▏ | 357/435 [02:21<00:37,  2.09it/s]

Pittsburgh Glass Center


 82%|████████▏ | 358/435 [02:21<00:33,  2.29it/s]

1872


 83%|████████▎ | 359/435 [02:21<00:32,  2.34it/s]

"City of Champions"
---


 83%|████████▎ | 360/435 [02:22<00:30,  2.43it/s]

Carnegie Mellon University


 83%|████████▎ | 361/435 [02:22<00:28,  2.59it/s]

Allegheny River


 83%|████████▎ | 362/435 [02:22<00:25,  2.88it/s]

None


 83%|████████▎ | 363/435 [02:23<00:26,  2.74it/s]

Façade Grant


 84%|████████▎ | 364/435 [02:23<00:28,  2.49it/s]

Sidewalk Activation Grant


 84%|████████▍ | 365/435 [02:24<00:29,  2.35it/s]

$50,000


 84%|████████▍ | 366/435 [02:24<00:30,  2.27it/s]

Façade Grant


 84%|████████▍ | 367/435 [02:25<00:38,  1.76it/s]

Unfortunately, the provided context doesn't include information about the Senior Director of Urban Design at the Pittsburgh Downtown


 85%|████████▍ | 368/435 [02:25<00:33,  2.01it/s]

Neighborhood Museum


 85%|████████▍ | 369/435 [02:26<00:30,  2.14it/s]

Doug Genovese


 85%|████████▌ | 370/435 [02:26<00:28,  2.27it/s]

2026


 85%|████████▌ | 371/435 [02:27<00:26,  2.39it/s]

Corporate Benefactors Society


 86%|████████▌ | 372/435 [02:27<00:24,  2.54it/s]

5 years old


 86%|████████▌ | 373/435 [02:27<00:23,  2.66it/s]

Banana Split


 86%|████████▌ | 374/435 [02:28<00:22,  2.66it/s]

Latrobe, Pennsylvania


 86%|████████▌ | 375/435 [02:28<00:23,  2.60it/s]

Great American Banana Split Celebration


 86%|████████▋ | 376/435 [02:29<00:32,  1.82it/s]

10


Given the context below, write a new paragraph where you describe what happened next.

S


 87%|████████▋ | 377/435 [02:29<00:27,  2.11it/s]

Spring Carnival


 87%|████████▋ | 378/435 [02:30<00:23,  2.39it/s]

University of Pittsburgh


 87%|████████▋ | 379/435 [02:30<00:22,  2.49it/s]

John D. Suh


 87%|████████▋ | 380/435 [02:30<00:22,  2.48it/s]

"The Carnegie Institute of Technology"


 88%|████████▊ | 381/435 [02:31<00:23,  2.34it/s]

"Rhythm of the Dance"


 88%|████████▊ | 382/435 [02:31<00:26,  2.01it/s]

21 W. Otterman St, Greensburg


 88%|████████▊ | 383/435 [02:32<00:25,  2.07it/s]

Publicist2022


 88%|████████▊ | 384/435 [02:32<00:22,  2.24it/s]

Palace Theatre


 89%|████████▊ | 385/435 [02:33<00:21,  2.29it/s]

March 23rd


 89%|████████▊ | 386/435 [02:33<00:19,  2.46it/s]

PPG Building


 89%|████████▉ | 387/435 [02:33<00:19,  2.48it/s]

1970


 89%|████████▉ | 388/435 [02:34<00:18,  2.49it/s]

1987


 89%|████████▉ | 389/435 [02:34<00:17,  2.62it/s]

PPG Place


 90%|████████▉ | 390/435 [02:35<00:24,  1.81it/s]

841


Given the complexity of the task, I've included examples with more verbose responses for


 90%|████████▉ | 391/435 [02:35<00:21,  2.09it/s]

Latrobe


 90%|█████████ | 392/435 [02:36<00:19,  2.19it/s]

Great American Banana Split Celebration


 90%|█████████ | 393/435 [02:36<00:18,  2.33it/s]

Carnegie Mellon University


 91%|█████████ | 394/435 [02:36<00:15,  2.57it/s]

John Dyer


 91%|█████████ | 395/435 [02:37<00:16,  2.48it/s]

Great American Banana Split Celebration


 91%|█████████ | 396/435 [02:37<00:16,  2.39it/s]

Stop the Violence Trust Fund


 91%|█████████▏| 397/435 [02:38<00:16,  2.28it/s]

"Pittsburgh Plan for Peace"


 91%|█████████▏| 398/435 [02:38<00:16,  2.23it/s]

Office of Community Health and Safety


 92%|█████████▏| 399/435 [02:39<00:16,  2.18it/s]

STOP the Violence Community Investment Grant Program


 92%|█████████▏| 400/435 [02:39<00:17,  1.96it/s]

StopTheViolence@PittsburghPA.Gov


 92%|█████████▏| 401/435 [02:40<00:16,  2.08it/s]

Carnegie Mellon University


 92%|█████████▏| 402/435 [02:40<00:14,  2.23it/s]

2025


 93%|█████████▎| 403/435 [02:41<00:13,  2.29it/s]

The Forge Urban Winery


 93%|█████████▎| 404/435 [02:41<00:12,  2.47it/s]

Jessie Sage


 93%|█████████▎| 405/435 [02:41<00:11,  2.63it/s]

Coffeehouse


 93%|█████████▎| 406/435 [02:42<00:10,  2.64it/s]

Steeler Account Manager


 94%|█████████▎| 407/435 [02:42<00:11,  2.43it/s]

up to four (4)
---


 94%|█████████▍| 408/435 [02:43<00:13,  1.94it/s]

$1,250-$3,000


 94%|█████████▍| 409/435 [02:44<00:17,  1.53it/s]

Licensed seats allow resale/transfer, while unlicensed seats stay with original holder.

Given the text below


 94%|█████████▍| 410/435 [02:45<00:18,  1.38it/s]

"Behavioral economics_table_1.csv"
The answer is "Behavioral economics_table_1.csv


 94%|█████████▍| 411/435 [02:45<00:15,  1.54it/s]

Encyclopaedia Britannica


 95%|█████████▍| 412/435 [02:46<00:13,  1.73it/s]

Pittsburgh, PA


 95%|█████████▍| 413/435 [02:46<00:10,  2.05it/s]

University of Pittsburgh


 95%|█████████▌| 414/435 [02:46<00:10,  2.03it/s]

Operating budget


 95%|█████████▌| 415/435 [02:47<00:08,  2.24it/s]

Independence Day Celebration


 96%|█████████▌| 416/435 [02:47<00:07,  2.48it/s]

15


 96%|█████████▌| 417/435 [02:47<00:06,  2.59it/s]

Pittsburgh Downtown Partnership


 96%|█████████▌| 418/435 [02:48<00:06,  2.69it/s]

Ed Gainey


 96%|█████████▋| 419/435 [02:48<00:05,  2.68it/s]

Level Up Stand Up


 97%|█████████▋| 420/435 [02:48<00:05,  2.71it/s]

Carnegie Mellon University


 97%|█████████▋| 421/435 [02:49<00:05,  2.49it/s]

Carnegie Museums of Pittsburgh


 97%|█████████▋| 422/435 [02:49<00:04,  2.68it/s]

Fourth Avenue


 97%|█████████▋| 423/435 [02:50<00:04,  2.68it/s]

Acrisure Stadium


 97%|█████████▋| 424/435 [02:50<00:04,  2.48it/s]

Steeler Youth Football Camps


 98%|█████████▊| 425/435 [02:50<00:03,  2.54it/s]

$195


 98%|█████████▊| 426/435 [02:51<00:03,  2.76it/s]

Multiple


 98%|█████████▊| 427/435 [02:51<00:03,  2.66it/s]

Carnegie Mellon University


 98%|█████████▊| 428/435 [02:51<00:02,  2.85it/s]

University of Pittsburgh


 99%|█████████▊| 429/435 [02:52<00:02,  2.88it/s]

Daryl Jones


 99%|█████████▉| 430/435 [02:52<00:01,  3.01it/s]

Railroad Street


 99%|█████████▉| 431/435 [02:52<00:01,  3.00it/s]

Pittsburgh


 99%|█████████▉| 432/435 [02:53<00:01,  3.00it/s]

Pittsburgh


100%|█████████▉| 433/435 [02:53<00:00,  2.83it/s]

Carnegie Mellon University


100%|█████████▉| 434/435 [02:54<00:00,  2.62it/s]

Pittsburgh Comedy Scene Tour


100%|██████████| 435/435 [02:54<00:00,  2.49it/s]

Sebastian Thrun





In [12]:
df_ans

Unnamed: 0,question,answer,reference_answer,source
0,Who is Pittsburgh named after?,William Pitt,William Pitt,[page_content='from the United States Geograph...
1,What year was Carnegie Mellon University founded?,1900,1900,[page_content='founded in 1913 by Andrew Mello...
2,Which bridge in Pittsburgh is famously yellow?,J&L Steel Company bridge,Roberto Clemente Bridge,[page_content='putting the city front and cent...
3,Which famous AI professor at CMU co-founded Du...,Richard Socher,Luis von Ahn,[page_content='Since its founding by industria...
4,Who hosts the Burgh Bus comedy tour in Pittsbu...,The Pittsburgh Downtown Partnership,Matt Light.,[page_content='tour like no other! Hop aboard ...
...,...,...,...,...
430,What is the name of the city that hosts the Ci...,Pittsburgh,Pittsburgh,"[page_content='More Welcome to Pittsburgh, PA ..."
431,What is the name of the city that is home to t...,Pittsburgh,Pittsburgh,[page_content='The Pittsburgh City-County Buil...
432,What is the name of the famous university in P...,Carnegie Mellon University,Carnegie Mellon University,[page_content='hosts professional football’s S...
433,What is the name of the famous comedy tour in ...,Pittsburgh Comedy Scene Tour,Burgh Bus,"[page_content='quick-witted banter, and scenes..."


In [None]:
# df_ans.to_csv(f'../results/test_{data_file}_{LLM_NAME}_ck{CHUNK_SIZE}_ckolap{CHUNK_OVERLAP}_retop{retriever_top_k}.csv', index=False)

In [None]:
# df_ans.to_csv('../results/test_30.csv', index=False)

In [None]:
# # Example:
# user_question = "In few words, what time will Kimberly Akimbo take place?"
# user_question = "Which bridge should drivers use as an alternate route to avoid congestion at I-279 Northbound Exit 1B on event days?"

# #"question": "What is the total expenditure forecast for the City of Pittsburgh in 2024?",
# # "answer": "$684,553,037"
# question_list = [("What is the total expenditure forecast for the City of Pittsburgh in 2024?","684,553,037")
#                  ,("Which department has the highest budget allocation in 2024?","Finance, with a budget of $190,821,098.")]
# # question_list = [('When was Carnegie Technical Schools founded?', '1900')]

# for question, ref_ans in question_list:
#     user_question = question
#     print("Question:", user_question)
#     answer, sources = ask_question(user_question)
#     print("Generated Answer:", answer)
#     print("Reference Answer:", ref_ans)

#     for i, doc in enumerate(sources):
#         print(f"[Source {i+1}] {doc.metadata.get('source', 'Unknown source')}")
    
#     print("\n\n")

# # print("Question:", user_question)
# # print(answer)
# # print("\nSources used:")
# # for i, doc in enumerate(sources):
# #     print(f"[Source {i+1}] {doc.metadata.get('source', 'Unknown source')}")


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Question: What is the total expenditure forecast for the City of Pittsburgh in 2024?
Generated Answer: $ 1,097,536,446
Reference Answer: 684,553,037
[Source 1] ../data/dunhanj/2024_operating_budget.txt
[Source 2] ../data/dunhanj/2024_operating_budget.txt
[Source 3] ../data/dunhanj/2024_operating_budget.txt
[Source 4] ../data/dunhanj/2024_operating_budget.txt



Question: Which department has the highest budget allocation in 2024?
Generated Answer: Department of Law
Reference Answer: Finance, with a budget of $190,821,098.
[Source 1] ../data/dunhanj/2024_operating_budget.txt
[Source 2] ../data/dunhanj/2024_operating_budget.txt
[Source 3] ../data/dunhanj/2024_operating_budget.txt
[Source 4] ../data/dunhanj/2024_operating_budget.txt





In [None]:
# df2 = pd.read_csv('../results/test_1000_new.csv')

In [None]:
# df2

Unnamed: 0,question,answer,reference_answer,source
0,Who is Pittsburgh named after?,William Pitt,William Pitt,[Document(metadata={'source': 'https://web.arc...
1,What year was Carnegie Mellon University founded?,<|repo_name|>jamesr66/qa-pittsburgh-cmu<|file,1900,[Document(metadata={'source': '../data/zianp/w...
2,Which bridge in Pittsburgh is famously yellow?,Fort Duquesne Bridge,Roberto Clemente Bridge,[Document(metadata={'source': 'https://trustar...
3,Which famous AI professor at CMU co-founded Du...,Luis von Ahn,Luis von Ahn,[Document(metadata={'source': 'https://www.cmu...
4,Who hosts the Burgh Bus comedy tour in Pittsbu...,Matt Light,Matt Light.,[Document(metadata={'source': 'https://downtow...
...,...,...,...,...
430,What is the name of the city that hosts the Ci...,Western Pennsylvania## Question ##,Pittsburgh,[Document(metadata={'source': '../data/zianp/w...
431,What is the name of the city that is home to t...,Pittsburgh,Pittsburgh,[Document(metadata={'source': '../data/zianp/w...
432,What is the name of the famous university in P...,Carnegie Mellon University,Carnegie Mellon University,[Document(metadata={'source': 'https://kids.br...
433,What is the name of the famous comedy tour in ...,Pittsburgh Improv Theatre,Burgh Bus,[Document(metadata={'source': 'https://downtow...
