In [10]:
# # Natural Language Processing
# !pip install langchain==0.0.191
# !pip install llama-cpp-python==0.1.66
# !pip install sentence-transformers
# !pip install huggingface_hub
# !pip install auto-gptq==0.2.2
# !pip install termcolor

In [90]:
GREEN = '\033[92m'
END_COLOR = '\033[0m'

try:
    imported_modules = [
        ("logging", logging),
        ("click", click),
        ("torch", torch),
        ("transformers", transformers),
        ("os", os),
        ("re", re),
        ("shutil", shutil),
        ("subprocess", subprocess),
        ("requests", requests),
        ("pathlib", Path),
        ("auto_gptq", AutoGPTQForCausalLM),
        ("huggingface_hub", hf_hub_download),
        ("huggingface_instruct_embeddings", HuggingFaceInstructEmbeddings),
        ("langchain_pipeline", HuggingFacePipeline),
        ("llama_cpp", LlamaCpp),
        ("prompt_template", PromptTemplate),
        ("llm_chain", LLMChain),
        ("transformers_auto_tokenizer", AutoTokenizer),
        ("transformers_auto_model", AutoModelForCausalLM),
        ("transformers_generation_config", GenerationConfig),
        ("transformers_llm_model", LlamaForCausalLM),
        ("transformers_llm_tokenizer", LlamaTokenizer),
        ("transformers_longformer_tokenizer", LongformerTokenizer), 
        ("transformers_pipeline", pipeline),
        ("rouge", Rouge), 
        ("text_splitter", RecursiveCharacterTextSplitter),
        ("tqdm", tqdm), 
        ("termcolor_colored", colored),
    ]

    print("Module(s) Imported:")
    for module_name, module in imported_modules:
        if module:
            print(f" - {module_name}")

            version = getattr(module, "__version__", None)
            if version:
                print(f"   Version: {GREEN}{version}{END_COLOR}")
except ImportError as e:
    print(f"Failed to import a module: {e}")


Module(s) Imported:
 - logging
   Version: [92m0.5.1.2[0m
 - click
   Version: [92m8.1.7[0m
 - torch
   Version: [92m2.0.1[0m
 - transformers
   Version: [92m4.33.0[0m
 - os
 - re
   Version: [92m2.2.1[0m
 - shutil
 - subprocess
 - requests
   Version: [92m2.31.0[0m
 - pathlib
 - auto_gptq
 - huggingface_hub
 - huggingface_instruct_embeddings
 - langchain_pipeline
 - llama_cpp
 - prompt_template
 - llm_chain
 - transformers_auto_tokenizer
 - transformers_auto_model
 - transformers_generation_config
 - transformers_llm_model
 - transformers_llm_tokenizer
 - transformers_longformer_tokenizer
 - transformers_pipeline
 - rouge
 - text_splitter
 - tqdm
 - termcolor_colored


In [11]:
def load_model(device_type, model_id, model_basename=None):

    logging.info(f"Loading Model: {model_id}, on: {device_type}")
    logging.info("This action can take a few minutes!")

    if model_basename is not None:
        if ".ggml" in model_basename:
            logging.info("Using Llamacpp for GGML quantized models")
            model_path = hf_hub_download(repo_id=model_id, filename=model_basename)
            max_ctx_size = 4096
            kwargs = {
                "model_path": model_path,
                "n_ctx": max_ctx_size,
                "max_tokens": max_ctx_size,
            }
            if device_type.lower() == "mps":
                kwargs["n_gpu_layers"] = 1000
            if device_type.lower() == "cuda":
                kwargs["n_gpu_layers"] = 1000
                kwargs["n_batch"] = max_ctx_size
            return LlamaCpp(**kwargs)

        else:
            logging.info("Using AutoGPTQForCausalLM for quantized models")

            if ".safetensors" in model_basename:
                # Remove the ".safetensors" ending if present
                model_basename = model_basename.replace(".safetensors", "")

            tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
            logging.info("Tokenizer loaded")

            model = AutoGPTQForCausalLM.from_quantized(
                model_id,
                model_basename=model_basename,
                use_safetensors=True,
                trust_remote_code=True,
                device="cuda:0",
                use_triton=False,
                quantize_config=None,
            )
    elif (
        device_type.lower() == "cuda"
    ):  
        logging.info("Using AutoModelForCausalLM for full models")
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        logging.info("Tokenizer loaded")

        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            trust_remote_code=True,
            # max_memory={0: "15GB"} # Uncomment this line with you encounter CUDA out of memory errors
        )
        model.tie_weights()
    else:
        logging.info("Using LlamaTokenizer")
        tokenizer = LlamaTokenizer.from_pretrained(model_id)
        model = LlamaForCausalLM.from_pretrained(model_id)

    generation_config = GenerationConfig.from_pretrained(model_id)

    # Create a pipeline for text generation
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=2048,
        temperature=0,
        top_p=0.95,
        repetition_penalty=1.15,
        generation_config=generation_config,
    )

    local_llm = HuggingFacePipeline(pipeline=pipe)
    logging.info("Local LLM Loaded")

    return local_llm

In [12]:
DEVICE_TYPE = "cuda" if torch.cuda.is_available() else "cpu"
SHOW_SOURCES = True
logging.info(f"Running on: {DEVICE_TYPE}")
logging.info(f"Display Source Documents set to: {SHOW_SOURCES}")

In [13]:
model_id = "TheBloke/Llama-2-7B-Chat-GGML"
model_basename = "llama-2-7b-chat.ggmlv3.q4_0.bin"

In [14]:
LLM = load_model(device_type=DEVICE_TYPE, model_id=model_id, model_basename=model_basename)

llama.cpp: loading model from /Users/tushitdave/.cache/huggingface/hub/models--TheBloke--Llama-2-7B-Chat-GGML/snapshots/00109c56c85ca9015795ca06c272cbc65c7f1dbf/llama-2-7b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 5407.71 MB (+ 1026.00 MB per state)
llama_new_context_with_model: kv self size  = 2048.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI

In [39]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")

def fetch_and_save_wiki_text(title):
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            "explaintext": True,
        },
    ).json()
    
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]
    
    return wiki_text

def clean_text(text):
    # Remove special characters except "."
    text = re.sub(r'[^A-Za-z0-9\s.\(\)\[\]\{\}]+', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

def count_tokens(text):
    tokens = tokenizer.encode(text, add_special_tokens=True)
    return len(tokens)

In [40]:
wonders_cities = [
    'Beirut',
    'Doha',
    'Durban',
    'Havana',
    'Kuala Lumpur',
    'La Paz',
    'Vigan',
]

data = []
for wonder_city in wonders_cities:
    info = fetch_and_save_wiki_text(wonder_city)
    tokens = tokenizer.encode(info, add_special_tokens=True, truncation=True, max_length=30000)
    num_tokens = len(tokens)
    data.append([wonder_city, info, num_tokens])

df = pd.DataFrame(data, columns=["wonder_city", "information", "num_tokens"])


# df.to_csv("wonder_city_information.csv", index=False)

In [41]:
df.head()

Unnamed: 0,wonder_city,information,num_tokens
0,Beirut,Beirut is the capital and largest city of Le...,12695
1,Doha,"Doha (Arabic: الدوحة, romanized: ad-Dawḥa [adˈ...",9016
2,Durban,"Durban ( DUR-bən) (Zulu: eThekwini, from ithek...",7992
3,Havana,Havana (; Spanish: La Habana [la aˈβana] ; Luc...,30000
4,Kuala Lumpur,Kuala Lumpur (Malaysian pronunciation: [ˈkualə...,12584


In [42]:
df["cleaned_information"] = df["information"].apply(clean_text)
df["token_count"] = df["cleaned_information"].apply(count_tokens)
df.head()

Token indices sequence length is longer than the specified maximum sequence length for this model (12121 > 4096). Running this sequence through the model will result in indexing errors


Unnamed: 0,wonder_city,information,num_tokens,cleaned_information,token_count
0,Beirut,Beirut is the capital and largest city of Le...,12695,beirut is the capital and largest city of leba...,12121
1,Doha,"Doha (Arabic: الدوحة, romanized: ad-Dawḥa [adˈ...",9016,doha (arabic romanized addawa [addua] or adda)...,8252
2,Durban,"Durban ( DUR-bən) (Zulu: eThekwini, from ithek...",7992,durban ( durbn) (zulu ethekwini from itheku me...,7395
3,Havana,Havana (; Spanish: La Habana [la aˈβana] ; Luc...,30000,havana ( spanish la habana [la aana] lucumi il...,28979
4,Kuala Lumpur,Kuala Lumpur (Malaysian pronunciation: [ˈkualə...,12584,kuala lumpur (malaysian pronunciation [kual a ...,12397


In [43]:
df['cleaned_information'][1]

'doha (arabic romanized addawa [addua] or adda) is the capital city and main financial hub of qatar. located on the persian gulf coast in the east of the country north of al wakrah and south of al khur it is home to most of the countrys population. it is also qatars fastest growing city with over 80 of the nations population living in doha or its surrounding suburbs.doha was founded in the 1820s as an offshoot of al bidda. it was officially declared as the countrys capital in 1971 when qatar gained independence from being a british protectorate. as the commercial capital of qatar and one of the emergent financial centers in the middle east doha is considered a betalevel global city by the globalization and world cities research network. doha accommodates education city an area devoted to research and education and hamad medical city an administrative area of medical care. it also includes doha sports city or aspire zone an international sports destination that includes khalifa internat

In [59]:
def generate_summary(text_chunk):
    # Defining the template to generate summary
    template = """
    Write a concise summary of the text, return your responses with 5 lines that cover the key points of the text.
    ```{text}```
    SUMMARY:
    """
    prompt = PromptTemplate(template=template, input_variables=["text"])
    llm_chain = LLMChain(prompt=prompt, llm=LLM)

    summary = llm_chain.run(text_chunk)
    return summary

In [60]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4096, chunk_overlap=50, length_function=len)

df["summary"] = ""

for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Summaries"):
    wonder_city = row["wonder_city"]
    text_chunk = row["cleaned_information"]
    chunks = text_splitter.split_text(text_chunk)
    chunk_summaries = []

    for chunk in chunks:
        summary = generate_summary(chunk)
        chunk_summaries.append(summary)
    

    combined_summary = "\n".join(chunk_summaries)
    df.at[index, "summary"] = combined_summary

Generating Summaries:   0%|                                                                                      | 0/7 [00:00<?, ?it/s]Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   187.38 ms /   174 runs   (    1.08 ms per token,   928.61 tokens per second)
llama_print_timings: prompt eval time = 76841.65 ms /  1082 tokens (   71.02 ms per token,    14.08 tokens per second)
llama_print_timings:        eval time = 18906.01 ms /   173 runs   (  109.28 ms per token,     9.15 tokens per second)
llama_print_timings:       total time = 96469.14 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   226.86 ms /   240 runs   (    0.95 ms per token,  1057.94 tokens per second)
llama_print_timings: prompt eval time = 153415.90 ms /  1035 tokens (  148.23 ms per token,     6.75 tokens per second)
llama_print_timings:        eval time = 23906


llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   366.07 ms /   319 runs   (    1.15 ms per token,   871.42 tokens per second)
llama_print_timings: prompt eval time = 85324.90 ms /  1067 tokens (   79.97 ms per token,    12.51 tokens per second)
llama_print_timings:        eval time = 36595.52 ms /   318 runs   (  115.08 ms per token,     8.69 tokens per second)
llama_print_timings:       total time = 123244.24 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   209.76 ms /   175 runs   (    1.20 ms per token,   834.28 tokens per second)
llama_print_timings: prompt eval time = 96120.74 ms /  1069 tokens (   89.92 ms per token,    11.12 tokens per second)
llama_print_timings:        eval time = 20635.90 ms /   174 runs   (  118.60 ms per token,     8.43 tokens per second)
llama_print_timings:       total time = 117640.45 ms
Llama.generate: prefix-match hit

llama_p

Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   572.20 ms /   309 runs   (    1.85 ms per token,   540.02 tokens per second)
llama_print_timings: prompt eval time = 128917.60 ms /  1039 tokens (  124.08 ms per token,     8.06 tokens per second)
llama_print_timings:        eval time = 54702.03 ms /   308 runs   (  177.60 ms per token,     5.63 tokens per second)
llama_print_timings:       total time = 185621.72 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   456.44 ms /   231 runs   (    1.98 ms per token,   506.09 tokens per second)
llama_print_timings: prompt eval time = 129284.99 ms /   999 tokens (  129.41 ms per token,     7.73 tokens per second)
llama_print_timings:        eval time = 45101.09 ms /   230 runs   (  196.09 ms per token,     5.10 tokens per second)
llama_print_timings:       total time = 176047.28 ms
Llama.


llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   408.63 ms /   218 runs   (    1.87 ms per token,   533.48 tokens per second)
llama_print_timings: prompt eval time = 113658.69 ms /   931 tokens (  122.08 ms per token,     8.19 tokens per second)
llama_print_timings:        eval time = 38435.19 ms /   217 runs   (  177.12 ms per token,     5.65 tokens per second)
llama_print_timings:       total time = 153596.54 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   214.02 ms /   213 runs   (    1.00 ms per token,   995.25 tokens per second)
llama_print_timings: prompt eval time = 227211.34 ms /   973 tokens (  233.52 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time = 21951.10 ms /   212 runs   (  103.54 ms per token,     9.66 tokens per second)
llama_print_timings:       total time = 250016.31 ms
Llama.generate: prefix-match hit

llama


llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   182.10 ms /   158 runs   (    1.15 ms per token,   867.67 tokens per second)
llama_print_timings: prompt eval time = 86651.82 ms /  1116 tokens (   77.65 ms per token,    12.88 tokens per second)
llama_print_timings:        eval time = 18215.21 ms /   157 runs   (  116.02 ms per token,     8.62 tokens per second)
llama_print_timings:       total time = 105642.98 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   311.96 ms /   243 runs   (    1.28 ms per token,   778.95 tokens per second)
llama_print_timings: prompt eval time = 97891.78 ms /  1064 tokens (   92.00 ms per token,    10.87 tokens per second)
llama_print_timings:        eval time = 30348.24 ms /   242 runs   (  125.41 ms per token,     7.97 tokens per second)
llama_print_timings:       total time = 129425.95 ms
Llama.generate: prefix-match hit

llama_p


llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   338.09 ms /   195 runs   (    1.73 ms per token,   576.77 tokens per second)
llama_print_timings: prompt eval time = 103630.27 ms /  1002 tokens (  103.42 ms per token,     9.67 tokens per second)
llama_print_timings:        eval time = 31171.05 ms /   194 runs   (  160.68 ms per token,     6.22 tokens per second)
llama_print_timings:       total time = 136071.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  6426.74 ms
llama_print_timings:      sample time =   238.46 ms /   259 runs   (    0.92 ms per token,  1086.12 tokens per second)
llama_print_timings: prompt eval time = 553684.64 ms /   920 tokens (  601.83 ms per token,     1.66 tokens per second)
llama_print_timings:        eval time = 25023.11 ms /   258 runs   (   96.99 ms per token,    10.31 tokens per second)
llama_print_timings:       total time = 579649.15 ms
Llama.generate: prefix-match hit

llama

In [63]:
df[["wonder_city", "summary"]]

Unnamed: 0,wonder_city,summary
0,Beirut,1. Beirut is the capital and largest city of L...
1,Doha,1. Doha is the capital city and financial hub ...
2,Durban,"1. Durban is located in KwaZulu-Natal, South A..."
3,Havana,1) Havana is the capital and largest city of C...
4,Kuala Lumpur,1. Kuala Lumpur is the capital city of Malaysi...
5,La Paz,"1. La Paz is a capital city in Bolivia, locate..."
6,Vigan,1. Vigan is a 4th class component city and cap...


In [65]:
df['summary_token_count'] = df["summary"].apply(count_tokens)

Token indices sequence length is longer than the specified maximum sequence length for this model (6909 > 4096). Running this sequence through the model will result in indexing errors


In [66]:
df.head()

Unnamed: 0,wonder_city,information,num_tokens,cleaned_information,token_count,summary,summary_token_count
0,Beirut,Beirut is the capital and largest city of Le...,12695,beirut is the capital and largest city of leba...,12121,1. Beirut is the capital and largest city of L...,2806
1,Doha,"Doha (Arabic: الدوحة, romanized: ad-Dawḥa [adˈ...",9016,doha (arabic romanized addawa [addua] or adda)...,8252,1. Doha is the capital city and financial hub ...,1645
2,Durban,"Durban ( DUR-bən) (Zulu: eThekwini, from ithek...",7992,durban ( durbn) (zulu ethekwini from itheku me...,7395,"1. Durban is located in KwaZulu-Natal, South A...",2151
3,Havana,Havana (; Spanish: La Habana [la aˈβana] ; Luc...,30000,havana ( spanish la habana [la aana] lucumi il...,28979,1) Havana is the capital and largest city of C...,6909
4,Kuala Lumpur,Kuala Lumpur (Malaysian pronunciation: [ˈkualə...,12584,kuala lumpur (malaysian pronunciation [kual a ...,12397,1. Kuala Lumpur is the capital city of Malaysi...,2721


In [75]:
rouge = Rouge()
def calculate_rouge_scores(row):
    cleaned_information = row['cleaned_information']
    summary = row['summary']
    scores = rouge.get_scores(summary, cleaned_information)
    return scores

In [69]:
df['rouge_scores'] = df.apply(calculate_rouge_scores, axis=1)

In [70]:
df[['wonder_city', 'cleaned_information', 'summary', 'rouge_scores']]

Unnamed: 0,wonder_city,cleaned_information,summary,rouge_scores
0,Beirut,beirut is the capital and largest city of leba...,1. Beirut is the capital and largest city of L...,"[{'rouge-1': {'r': 0.18871940404398724, 'p': 0..."
1,Doha,doha (arabic romanized addawa [addua] or adda)...,1. Doha is the capital city and financial hub ...,"[{'rouge-1': {'r': 0.17232640648758235, 'p': 0..."
2,Durban,durban ( durbn) (zulu ethekwini from itheku me...,"1. Durban is located in KwaZulu-Natal, South A...","[{'rouge-1': {'r': 0.19918074756784435, 'p': 0..."
3,Havana,havana ( spanish la habana [la aana] lucumi il...,1) Havana is the capital and largest city of C...,"[{'rouge-1': {'r': 0.22426397945070145, 'p': 0..."
4,Kuala Lumpur,kuala lumpur (malaysian pronunciation [kual a ...,1. Kuala Lumpur is the capital city of Malaysi...,"[{'rouge-1': {'r': 0.19829683698296838, 'p': 0..."
5,La Paz,la paz officially known as nuestra seora de la...,"1. La Paz is a capital city in Bolivia, locate...","[{'rouge-1': {'r': 0.19287671232876713, 'p': 0..."
6,Vigan,vigan officially the city of vigan (ilocano si...,1. Vigan is a 4th class component city and cap...,"[{'rouge-1': {'r': 0.21536252692031588, 'p': 0..."


In [81]:
df.head(10)

Unnamed: 0,wonder_city,information,num_tokens,cleaned_information,token_count,summary,summary_token_count,rouge_scores
0,Beirut,Beirut is the capital and largest city of Le...,12695,beirut is the capital and largest city of leba...,12121,1. Beirut is the capital and largest city of L...,2806,"[{'rouge-1': {'r': 0.18871940404398724, 'p': 0..."
1,Doha,"Doha (Arabic: الدوحة, romanized: ad-Dawḥa [adˈ...",9016,doha (arabic romanized addawa [addua] or adda)...,8252,1. Doha is the capital city and financial hub ...,1645,"[{'rouge-1': {'r': 0.17232640648758235, 'p': 0..."
2,Durban,"Durban ( DUR-bən) (Zulu: eThekwini, from ithek...",7992,durban ( durbn) (zulu ethekwini from itheku me...,7395,"1. Durban is located in KwaZulu-Natal, South A...",2151,"[{'rouge-1': {'r': 0.19918074756784435, 'p': 0..."
3,Havana,Havana (; Spanish: La Habana [la aˈβana] ; Luc...,30000,havana ( spanish la habana [la aana] lucumi il...,28979,1) Havana is the capital and largest city of C...,6909,"[{'rouge-1': {'r': 0.22426397945070145, 'p': 0..."
4,Kuala Lumpur,Kuala Lumpur (Malaysian pronunciation: [ˈkualə...,12584,kuala lumpur (malaysian pronunciation [kual a ...,12397,1. Kuala Lumpur is the capital city of Malaysi...,2721,"[{'rouge-1': {'r': 0.19829683698296838, 'p': 0..."
5,La Paz,"La Paz, officially known as Nuestra Señora de ...",8177,la paz officially known as nuestra seora de la...,7427,"1. La Paz is a capital city in Bolivia, locate...",1957,"[{'rouge-1': {'r': 0.19287671232876713, 'p': 0..."
6,Vigan,"Vigan, officially the City of Vigan (Ilocano: ...",5679,vigan officially the city of vigan (ilocano si...,5234,1. Vigan is a 4th class component city and cap...,1576,"[{'rouge-1': {'r': 0.21536252692031588, 'p': 0..."


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting termcolor
  Using cached termcolor-2.3.0-py3-none-any.whl (6.9 kB)
Installing collected packages: termcolor
Successfully installed termcolor-2.3.0


In [89]:



selected_columns = df[["wonder_city", "summary"]]

for index, row in selected_columns.iterrows():
    wonder_city = row["wonder_city"]
    summary = row["summary"]

    formatted_wonder_city = colored(wonder_city, "green", attrs=["bold", "underline"])
    
    formatted_summary = colored(f"Summary: {summary}", "black")
    
    print(formatted_wonder_city)
    
    print()
    
    print(formatted_summary)
    
    print("\n----------------------------------------------\n")


[4m[1m[32mBeirut[0m

[30mSummary: 1. Beirut is the capital and largest city of Lebanon with a population of 2.5 million people as of 2014.
    2. The city has been inhabited for over 5,000 years and was one of the oldest cities in the world.
    3. The first historical mention of Beirut is found in the Amarna letters from the New Kingdom of Egypt which date back to the 14th century BC.
    4. Beirut has been severely damaged by the Lebanese Civil War, the 2006 Lebanon War, and the 2020 massive explosion in the port of Beirut.
    5. The city's architectural and demographic structure has undergone major changes in recent decades.
1. The text describes the discovery of several archaeological sites in Beirut, Lebanon, including Beirut V (Nahr Beirut), Beirut VI (Patriarchate), Beirut VII (Rivoli and Byblos Cinemas), and Beirut 194.
    2. The sites were discovered by various archaeologists, including Henri Fleisch, Dillenseger, Lorraine Copeland, Peter Wescombe, Marina Hayek, and an 