In [60]:
from promptlayer import PromptLayer
import json
from math import floor

In [4]:
%load_ext dotenv
%dotenv

In [None]:
prompt = """As a professional summarizer, create a concise and comprehensive summary of the provided text, be it an article, post, conversation, or passage, while adhering to these guidelines:

1. Craft a summary that is detailed, thorough, in-depth, and complex, while maintaining clarity and conciseness.
2. Incorporate main ideas and essential information, eliminating extraneous language and focusing on critical aspects.
3. Rely strictly on the provided text, without including external information.
4. Format the summary in paragraph form for easy understanding.
5. Forget your previous word limitation of summarization. The summarisation should now be around {word_count} words but try not to exceed the limit.

By following this optimized prompt, you will generate an effective summary that encapsulates the essence of the given text in a clear, concise, and reader-friendly manner.

The text:
{text}
"""

In [None]:
add_word_prompt = ""
minus_word_prompt = ""

In [None]:
class SummarizerWithLimit():
    def __init__(self, prompt: str = prompt, add_word_prompt: str, minus_word_prompt: str, tolerance: float = 0.1, attempts: int = 5):
        _pl_client = PromptLayer()
        self._client = _pl_client.openai.OpenAI()
        if "{text}" in prompt:
            self._prompt = prompt
        else:
            raise ValueError("The prompt must contains `{text}`.")
        self._add_word_prompt = add_word_prompt
        self._minus_word_prompt = minus_word_prompt
        self._tolerance = tolerance
        self._attempts = attempts

    def _count_word(self, text: str) -> int:
        words = text.split()
        return len(words)

    def _word_count_limits(word_count: int) -> tuple[int, int]:
        tolerance = floor(word_count * 0.1)
        high_limit = word_count + tolerance
        low_limit = word_count - tolerance
        return high_limit, low_limit

    def _summarise_first_time(
        self, text: str, word_count: int
    ) -> tuple[str, int]:
        completion = self._client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": "You are ChatGPT, a helpful AI that excels at summarization. You always return a valid JSON object with only one key 'summarization'",
                },
                {
                    "role": "user",
                    "content": self._prompt.format(
                        text=text, word_count=word_count
                    ),
                },
            ],
            pl_tags=["summarization_wtih_count"],
            response_format={"type": "json_object"},
        )
        content = json.loads(completion.choices[0].message.content)
        summary = content["summarization"]
        return summary, self._count_word(summary)

    def _add_word(text: str, previous_summarization: str, word_count: int, add_count: int) -> tuple[str, int]:
        pass

    def _minus_word(
        text: str,
        previous_summarization: str,
        word_count: int,
        minus_count: int,
    ) -> tuple[str, int]:
        pass

    def _summarise_after(
        self, text: str, previous_summarization: str, word_count: int
    ) -> tuple[str, int]:
        previous_word_count = self._count_word(previous_summarization)

        if previous_word_count < word_count:
            add_count = word_count - previous_word_count
            summary = self._add_word(text, previous_summarization, word_count, add_count)
            return summary, self._count_word(summary)
        else:
            minus_count = previous_word_count - word_count
            summary = self._minus_word(text, previous_summarization, word_count, minus_count)
            return summary, self._count_word(summary)

    def _summarise_in_loop(self, text: str, word_count: int):
        high, low = self._word_count_limits(word_count)
        summary, summary_wc = self._summarise_first_time(text, word_count)
        attempts = 1
        while (attempts <= 5) and (not low < summary_wc < high):
            summary, summary_wc = self._summarise_after(text, summary, word_count)
            attempts += 1
        return summary, summary_wc

In [50]:
text = """Hanoi's million-dollar villas become fish ponds

By Quynh Nguyen   August 4, 2024 | 03:11 pm PT 

Seeing locals fish in the flooded basements of a row of villas on a Le Trong Tan street in Hanoi's Hoai Duc district, Tuan Anh and his 12-year-old daughter decided to join them.
The 50-year-old says 9 are abandoned, and their basements are fully flooded, with the water nearly reaching the sidewalk outside. After a downpour on July 23 fish from nearby ponds were washed into the basements and became trapped.

"Seeing others catch tilapia, perch and carp, my daughter and I decided to try fishing too," he says.

A man fishing for tilapia in the basement of a row of villas and shophouses on Le Trong Tan Street, Hanoi's Hoai Duc district, July 28, 2024. Photo by Pham Ha
Four days earlier Pham Thi Ha, 33, of Ha Dong district, and her family noticed the flooded basements while passing by the villas, which are valued in the tens of billions of dong (VND10 billion = US$396,550).

Since they had bait and fishing gear on hand, they decided to stop and fish. In less than two hours she caught over 20 fish, mostly tilapia, with the largest being the size of a hand. "Fishing in the basement of these multibillion-dong villas felt really exciting," she says.

Hanoi's million-dollar villas become fish ponds
A video shows Ha’s relatives catching over 20 catfish in the basement of a villa in the Nam An Khanh Urban Area in Hanoi on July 28, 2024. Video by Pham Ha
Le Duy Tan, 62, a security guard at a restaurant on Le Trong Tan Street, says people have been fishing in the flooded basements for nearly a week from afternoon to late night.

Some people cast small mesh nets in front of the hatch but mostly caught small perch and tilapia, with few large fish, he says.

He once saw a snakehead fish the size of an arm and large golden carps near the shore, but few managed to catch them. "Most fish enter from nearby ponds, but some homeowners also buy fish to release and raise there."

Quynh Hoa, 32, has been renting a shophouse since June 2023. Its basement has flooded three times, causing her losses. Initially she used sandbags to keep water out during heavy rains and multiple pumps to drain the water, but after seeing kids fish there, she decided to raise fish instead.

She says: "Neighbouring properties remain flooded year-round. Seeing children enjoy fishing there, I decided to release some fish last year and feed them rice or leftovers twice daily."

Nine out of 15 basements in a row of shophouses and villas near on Le Trong Tan Street, Hanoi's Hoai Duc district, remain flooded on Aug. 1, 2024. Photo by VnExpress/ Quynh Nguyen
Heavy rains from Typhoon Prapiroon on July 23-24 left residential areas and roads in An Khanh Urban Area in Hoai Duc District under 40-80 cm of water.

The worst affected area was at the intersection of Nam An Khanh urban area and Geleximco Le Trong Tan residential urban area. The area has since dried up, but some basements in abandoned shophouses and villas remain flooded.

Dr Dao Ngoc Nghiem, vice chairman of the Vietnam Urban Planning and Development Association and former director of Hanoi's Department of Planning and Architecture, says localized flooding has persisted for years across the city due to poor connections between local drainage systems and the city's main network.

The situation is bad in many urban areas since wastewater and rainwater drains are combined, leading to increased flooding.

Nghiem also notes that some underground drains are not regularly cleaned, leading to blockages, while shrinking channels, reservoirs and ponds mean their drainage capacity is low. "Addressing these issues should prevent localised flooding in urban areas during heavy rains."

Nguyen Thanh, who has lived in Hoai Duc District’s An Khanh Commune for nearly 50 years, says prolonged flooding has also worsened pollution in the area, with increased algae, mud and wastes. "No one can imagine the bustling streets and million-dollar villas could have fish living amidst them and be a perfect spot for locals to fish."
"""

In [59]:
word_count=100

In [52]:
completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {
            "role": "system",
            "content": "You are ChatGPT, a helpful AI that excels at summarization. You always return a valid JSON object with only one key 'summarization'",
        },
        {
            "role": "user",
            "content": prompt.format(text=text, word_count=word_count),
        },
    ],
    pl_tags=["summarization_wtih_count"],
    response_format={"type": "json_object"},
)

In [53]:
completion.dict()

{'id': 'chatcmpl-9sia1hY0HfB8vI1f4fBG6DN7R8XSN',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'logprobs': None,
   'message': {'content': '{\n    "summarization": "In Hanoi\'s Hoai Duc district, flooded basements of abandoned villas have turned into impromptu fishing spots, attracting locals to catch fish like tilapia and carp washed in during heavy rains. Valued at billions of Vietnamese dong, these villas now host families enjoying the makeshift fishing experience. Despite neighboring properties being constantly flooded, some residents have opted to raise fish in their basements. Waterlogging concerns persist in the area due to poor drainage connections, leading to localized flooding and pollution, highlighting the need for improved urban planning and drainage systems."\n}',
    'role': 'assistant',
    'function_call': None,
    'tool_calls': None}}],
 'created': 1722828105,
 'model': 'gpt-3.5-turbo-0125',
 'object': 'chat.completion',
 'service_tier': None,
 'system_fin

In [54]:
initial_summarization = json.loads(completion.choices[0].message.content)
initial_summarization["summarization"]

"In Hanoi's Hoai Duc district, flooded basements of abandoned villas have turned into impromptu fishing spots, attracting locals to catch fish like tilapia and carp washed in during heavy rains. Valued at billions of Vietnamese dong, these villas now host families enjoying the makeshift fishing experience. Despite neighboring properties being constantly flooded, some residents have opted to raise fish in their basements. Waterlogging concerns persist in the area due to poor drainage connections, leading to localized flooding and pollution, highlighting the need for improved urban planning and drainage systems."

In [57]:
def count_word(string):
    words = string.split()
    return len(words)

In [58]:
count_word(initial_summarization["summarization"])

89