In [4]:
from dotenv import load_dotenv
import os
import google.generativeai as genai
import google.ai.generativelanguage as glm
import textwrap
from datasets import load_dataset
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

In [2]:
load_dotenv()

True

In [3]:
dataset = load_dataset('ag_news')

In [4]:
# Let's convert them to dataframes
df_train = pd.DataFrame(dataset['train'])
df_test = pd.DataFrame(dataset['test'])

In [5]:
df_train.head(10)

Unnamed: 0,text,label
0,Wall St. Bears Claw Back Into the Black (Reute...,2
1,Carlyle Looks Toward Commercial Aerospace (Reu...,2
2,Oil and Economy Cloud Stocks' Outlook (Reuters...,2
3,Iraq Halts Oil Exports from Main Southern Pipe...,2
4,"Oil prices soar to all-time record, posing new...",2
5,"Stocks End Up, But Near Year Lows (Reuters) Re...",2
6,Money Funds Fell in Latest Week (AP) AP - Asse...,2
7,Fed minutes show dissent over inflation (USATO...,2
8,Safety Net (Forbes.com) Forbes.com - After ear...,2
9,Wall St. Bears Claw Back Into the Black NEW Y...,2


In [6]:
genai.configure(api_key=os.environ['GOOGLE_KEY'])
model = genai.GenerativeModel('gemini-pro')

In [12]:
response = model.generate_content("Write a story about a magic backpack.")

In [13]:
print(response.text)

In the bustling town of Willow Creek, amidst the quaint shops and cobblestone streets, there existed a remarkable backpack—a backpack imbued with extraordinary magic.

Crafted from ancient, shimmering fabric, the backpack possessed an unassuming appearance. But within its four compartments resided secrets that defied all logic.

Emily, a curious and imaginative 12-year-old, stumbled upon the backpack at a dusty antique shop. Intrigued by its enigmatic presence, she decided to take it home. Little did she know that her life was about to change forever.

As Emily unzipped the first compartment, a brilliant array of colors erupted before her eyes. Books of all shapes and sizes floated weightlessly, their pages turning by themselves—a testament to the backpack's unspoken knowledge.

With a whisper of excitement, Emily reached for the second compartment. Suddenly, she was enveloped in a warm breeze that carried the scent of blooming flowers. Clothes of every imaginable style and color dance

In [30]:
classification = glm.Schema(
    type = glm.Type.OBJECT,
    properties = {
        'classification_label':  glm.Schema(type=glm.Type.STRING),
    },
    required=['classification_label']
)

In [31]:
classify_news_article = glm.FunctionDeclaration(
    name="classify_news_article",
    description=textwrap.dedent("""\
        Classify the news article into one of the five categories: world, sports, business, technology, science. 
        """),
    parameters=glm.Schema(
        type=glm.Type.OBJECT,
        properties = {
            'classification': classification
        }
    )
)

In [32]:
model = genai.GenerativeModel(model_name='gemini-1.0-pro', tools = [classify_news_article])

In [11]:
article = df_test.iloc[0].text

In [12]:
article

"Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."

In [33]:
result = model.generate_content(f"""
    Please classify the provided news article into one of the following categories: world, sports, business, technology, science. Do not return null as a classification label, find the category closest to the provided article. 

{article}
""")

In [34]:
fc = result.candidates[0].content.parts[0].function_call
print(type(fc).to_dict(fc))

{'name': 'classify_news_article', 'args': {'classification': {'classification_label': 'business'}}}


In [81]:
def get_gemini_classification(news_text):
    print(news_text)

    result = model.generate_content(f"""
    I will be giving a news article or a headline. You will have to classify that into one of the following categories. Choose world if no other category is appropriate. Return only the classification label.\n
    Categories:\n
    - world\n
    - sports\n
    - business\n
    - technology\n
    - science\n

    Article: {news_text}
    """)
    
    fc_result = result.candidates[0].content.parts[0].function_call
    label = type(fc_result).to_dict(fc_result)['args']['classification']['classification_label']
    print(label)
    if label == 'world':
        return 0
    if label == 'sports':
        return 1
    if label == 'business':
        return 2
    if label == 'technology':
        return 3
    if label == 'science':
        return 3

In [82]:
get_gemini_classification(df_test.iloc[17].text)

Mars Rovers Relay Images Through Mars Express European Space Agency -- ESAs Mars Express has relayed pictures from one of NASA's Mars rovers for the first time, as part of a set of interplanetary networking demonstrations.     The demonstrations pave the way for future Mars missions to draw on joint interplanetary networking capabilities...
science


3

We will now find the accuracy and other metrics. 

In [5]:
res_df_path = '../../data/gemini_text_classification.csv'

In [6]:
res_df = pd.read_csv(res_df_path)

In [7]:
res_df.head(50)

Unnamed: 0,index,text,predicted_label,actual_label,execution_time
0,0,Fears for T N pension after talks Unions repre...,2.0,2,3.146692
1,1,The Race is On: Second Private Team Sets Launc...,3.0,3,1.38731
2,2,Ky. Company Wins Grant to Study Peptides (AP) ...,3.0,3,1.677121
3,3,Prediction Unit Helps Forecast Wildfires (AP) ...,3.0,3,1.477643
4,4,Calif. Aims to Limit Farm-Related Smog (AP) AP...,0.0,3,1.405219
5,5,Open Letter Against British Copyright Indoctri...,2.0,3,1.549904
6,6,"Loosing the War on Terrorism \\""Sven Jaschan, ...",3.0,3,1.447161
7,7,"FOAFKey: FOAF, PGP, Key Distribution, and Bloo...",3.0,3,1.641654
8,8,E-mail scam targets police chief Wiltshire Pol...,0.0,3,1.690559
9,9,"Card fraud unit nets 36,000 cards In its first...",2.0,3,1.365808


In [8]:
res_df['predicted_label'] = res_df['predicted_label'].fillna(0)

In [9]:
res_df.head(20)

Unnamed: 0,index,text,predicted_label,actual_label,execution_time
0,0,Fears for T N pension after talks Unions repre...,2.0,2,3.146692
1,1,The Race is On: Second Private Team Sets Launc...,3.0,3,1.38731
2,2,Ky. Company Wins Grant to Study Peptides (AP) ...,3.0,3,1.677121
3,3,Prediction Unit Helps Forecast Wildfires (AP) ...,3.0,3,1.477643
4,4,Calif. Aims to Limit Farm-Related Smog (AP) AP...,0.0,3,1.405219
5,5,Open Letter Against British Copyright Indoctri...,2.0,3,1.549904
6,6,"Loosing the War on Terrorism \\""Sven Jaschan, ...",3.0,3,1.447161
7,7,"FOAFKey: FOAF, PGP, Key Distribution, and Bloo...",3.0,3,1.641654
8,8,E-mail scam targets police chief Wiltshire Pol...,0.0,3,1.690559
9,9,"Card fraud unit nets 36,000 cards In its first...",2.0,3,1.365808


In [10]:
print(f'Total execution time {res_df['execution_time'].sum()} seconds')

Total execution time 11051.72746575797 seconds


In [11]:
print('Test Accuracy: {}'.format(
    accuracy_score(y_true=res_df['actual_label'], y_pred=res_df['predicted_label'])
))

Test Accuracy: 0.8202906279162778


In [12]:
print('Test f-score: {}'.format(
    f1_score(y_true=res_df['actual_label'], y_pred=res_df['predicted_label'], average='weighted'), 
))

Test f-score: 0.817585267475348
