# Summarise Documents

## Set up Azure OpenAI

In [1]:
# import openai
# from openai import AzureOpenAI
# import os 
# from azure.identity import ManagedIdentityCredential

# default_credential=ManagedIdentityCredential(client_id="XXX")
# token=default_credential.get_token("https://cognitiveservices.azure.com/.default")
# Resource_endpoint="XXX"

# client = AzureOpenAI(
#   azure_endpoint = Resource_endpoint, 
#   api_key=token.token,  
#   api_version="2023-05-15"
# )

In [1]:
import os
import openai
from openai import AzureOpenAI
from dotenv import load_dotenv

# Set up Azure OpenAI
load_dotenv("credentials.env")

openai.api_type = "azure"
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-02-01",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

## Load Data

In [2]:
import pandas as pd

df_orig = pd.read_csv("data/bbc-news-data.csv", delimiter='\t', index_col=False)

In [3]:
df = df_orig.copy()
df

Unnamed: 0,category,filename,title,content
0,business,001.txt,Ad sales boost Time Warner profit,Quarterly profits at US media giant TimeWarne...
1,business,002.txt,Dollar gains on Greenspan speech,The dollar has hit its highest level against ...
2,business,003.txt,Yukos unit buyer faces loan claim,The owners of embattled Russian oil giant Yuk...
3,business,004.txt,High fuel prices hit BA's profits,British Airways has blamed high fuel prices f...
4,business,005.txt,Pernod takeover talk lifts Domecq,Shares in UK drinks and food firm Allied Dome...
...,...,...,...,...
2220,tech,397.txt,BT program to beat dialler scams,BT is introducing two initiatives to help bea...
2221,tech,398.txt,Spam e-mails tempt net shoppers,Computer users across the world continue to i...
2222,tech,399.txt,Be careful how you code,A new European directive could put software w...
2223,tech,400.txt,US cyber security chief resigns,The man making sure US computer networks are ...


## Create prompt

In [4]:
prompt_postfix = """ 
  \n\nTl;dr
"""

prompt = df['title'].loc[0] + "\n" + df['content'].loc[0] + prompt_postfix
prompt

'Ad sales boost Time Warner profit\n Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.  The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.  Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL\'s underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues. It hopes to increase subscribers by offering the online service free to TimeWarner internet customers and will try to sign up 

## Requst to API

In [5]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    temperature=0,
    max_tokens=50
)

print(response.choices[0].message.content.strip())


TimeWarner's quarterly profits surged 76% to $1.13bn, driven by strong ad sales and high-speed internet growth, despite a decline in AOL subscribers and Warner Bros' profit slump due to box-office flops like *Alexander*


### Putting the codes together

In [7]:
results = pd.DataFrame(columns=['summary'], index=df.index)
#prompt postfix
prompt_postfix = "\n\nTl;dr"

for idx, title, content in zip(range(10), df['title'].loc[df.index.values], df['content'].loc[df.index.values]):
    # build prompt
    prompt = title + "\n" + content + prompt_postfix

    try:
        #Request API
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=50
        )
        results.at[idx, 'summary'] = response.choices[0].message.content.strip()
    except Exception as err:
        print(f"Unexpected error at index {idx}: {err} ({type(err)})")


### Results

In [8]:
results.head(10)

Unnamed: 0,summary
0,TimeWarner's quarterly profits surged 76% to $...
1,The US dollar hit a nearly three-month high ag...
2,"Yukos' former production unit, Yugansk, was so..."
3,British Airways (BA) reported a 40% drop in pr...
4,Shares in UK drinks and food firm Allied Domec...
5,Japan narrowly avoided a technical recession w...
6,"The US added 146,000 jobs in January, below ex..."
7,"India's Finance Minister, Palaniappan Chidamba..."
8,Ethiopia's crop production increased by 24% in...
9,A US appeals court rejected a $280 billion law...


### Adding results to dataframe

In [9]:
df_results = pd.concat([df.head(10), results.head(10)], axis=1)
df_results.shape
df_results

Unnamed: 0,category,filename,title,content,summary
0,business,001.txt,Ad sales boost Time Warner profit,Quarterly profits at US media giant TimeWarne...,TimeWarner's quarterly profits surged 76% to $...
1,business,002.txt,Dollar gains on Greenspan speech,The dollar has hit its highest level against ...,The US dollar hit a nearly three-month high ag...
2,business,003.txt,Yukos unit buyer faces loan claim,The owners of embattled Russian oil giant Yuk...,"Yukos' former production unit, Yugansk, was so..."
3,business,004.txt,High fuel prices hit BA's profits,British Airways has blamed high fuel prices f...,British Airways (BA) reported a 40% drop in pr...
4,business,005.txt,Pernod takeover talk lifts Domecq,Shares in UK drinks and food firm Allied Dome...,Shares in UK drinks and food firm Allied Domec...
5,business,006.txt,Japan narrowly escapes recession,Japan's economy teetered on the brink of a te...,Japan narrowly avoided a technical recession w...
6,business,007.txt,Jobs growth still slow in the US,The US created fewer jobs than expected in Ja...,"The US added 146,000 jobs in January, below ex..."
7,business,008.txt,India calls for fair trade rules,"India, which attends the G7 meeting of seven ...","India's Finance Minister, Palaniappan Chidamba..."
8,business,009.txt,Ethiopia's crop production up 24%,Ethiopia produced 14.27 million tonnes of cro...,Ethiopia's crop production increased by 24% in...
9,business,010.txt,Court rejects $280bn tobacco case,A US government claim accusing the country's ...,A US appeals court rejected a $280 billion law...


## Save results

In [10]:
fname = 'output/summaries.csv'
df_results.to_csv(fname, sep='\t')