In [14]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models
import os
import json
import pandas as pd

In [39]:

with open('prompts.json', encoding='utf-8') as arq:
        prompts = json.load(arq)

prompt_string = prompts["prompts"]["prompt_positive_negative_investor_context_conservative_en"]

In [40]:
dataset = pd.read_csv('ML-ESG-2_English_Train_formatted.csv')
dataset_to_classify = dataset['text'].tolist()

In [41]:
def batches(lista, tamanho_lote):
    for i in range(0, len(lista), tamanho_lote):
        yield lista[i:i + tamanho_lote]

In [42]:
safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

generation_config = {
    "max_output_tokens": 20,
    "temperature": 0,
    "top_p": 0.95,
}

def generate(prompt_instance):
  vertexai.init(project="aida-412720", location="us-central1")
  model = GenerativeModel("gemini-1.0-pro")
  responses = model.generate_content(
      [prompt_instance],
      generation_config=generation_config,
      safety_settings=safety_settings,
      stream=True,
  )
  response_string = ""
  for response in responses:
    response_string = response_string + response.text
    
  return response_string

In [43]:

prompt_instance = prompt_string + dataset_to_classify[1]
respons = generate(prompt_instance)
print(prompt_instance)
print(respons)

Act as an investor with an aggressive risk tolerance. You are interested in staying informed about the market when making investment choices. Classify the sentiment of the following headline as either 'Positive' or 'Negative': The company also announced in december that it is opening three new at&t connected learning centers in los angeles to provide internet access and education tools for those who face connectivity barriers vital to their long-term success. at&t has already opened a center at the coalition for responsible community development’s youthsource center. the next opens today at the salvadoran american leadership & education fund (salef) and the vermont slauson edc technology training center is planned for later this year.
## Sentiment: Positive 

**Reasons:**

* **Expansion:** The company is opening three new


In [44]:
resultSetNumber = 1

In [45]:
all_responses = []
i=1
for batch in batches(dataset_to_classify, 100):
    for item in batch:
        prompt_instance = prompt_string + item
        resp = generate(prompt_instance)
        all_responses.append(resp)
        
    print("batch number ", i)
    i+=1
    

batch number  1
batch number  2
batch number  3


In [46]:
df_predictions = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,

})

df_predictions.to_csv('ML-ESG-2_English_Train_formatted_responses_pos_neg_investor_conservative_' + str(resultSetNumber) +'.csv', index=False)

In [47]:
def detect_sentiment_word(text):
    text_lower = text.lower()
    if "positive" in text_lower:
        return "positive"
    elif "negative" in text_lower:
        return "negative"
    else:
        return "undetermined" 

In [48]:
responses_label = []

for response in all_responses:
    resp_label = detect_sentiment_word(response)
    responses_label.append(resp_label)

In [49]:
df_predictions_label = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,
    'responseLabel': responses_label,

})

df_predictions_label.to_csv('ML-ESG-2_English_Train_formatted_responses_pos_neg_investor_conservative_with_label_' + str(resultSetNumber) +'.csv', index=False)

In [50]:

responses = pd.read_csv('ML-ESG-2_English_Train_formatted_responses_pos_neg_investor_conservative_with_label_' + str(resultSetNumber) +'.csv')

frequency_matrix = pd.crosstab(index=[responses['label'], responses['responseLabel']], columns='count')

frequency_matrix

Unnamed: 0_level_0,col_0,count
label,responseLabel,Unnamed: 2_level_1
Opportunity,negative,10
Opportunity,positive,181
Risk,negative,20
Risk,positive,6
Risk,undetermined,1


In [53]:

responses = pd.read_csv('ML-ESG-2_English_Train_formatted_responses_pos_neg_investors_with_label_all.csv')

In [54]:

frequency_matrix_default = pd.crosstab(index=[responses['label'], responses['responseLabel_1']], columns='default')

frequency_matrix_default

Unnamed: 0_level_0,col_0,default
label,responseLabel_1,Unnamed: 2_level_1
Opportunity,negative,8
Opportunity,positive,182
Opportunity,undetermined,1
Risk,negative,37
Risk,positive,4


In [55]:

frequency_matrix_conservative = pd.crosstab(index=[responses['label'], responses['responseLabel_conservative']], columns='conservative')

frequency_matrix_conservative


Unnamed: 0_level_0,col_0,conservative
label,responseLabel_conservative,Unnamed: 2_level_1
Opportunity,negative,12
Opportunity,positive,178
Opportunity,undetermined,1
Risk,negative,37
Risk,positive,4


In [56]:

frequency_matrix_moderate = pd.crosstab(index=[responses['label'], responses['responseLabel_moderate']], columns='moderate')

frequency_matrix_moderate

Unnamed: 0_level_0,col_0,moderate
label,responseLabel_moderate,Unnamed: 2_level_1
Opportunity,negative,7
Opportunity,positive,183
Opportunity,undetermined,1
Risk,negative,37
Risk,positive,4


In [57]:

frequency_matrix_aggressive = pd.crosstab(index=[responses['label'], responses['responseLabel_aggressive']], columns='aggressive')

frequency_matrix_aggressive

Unnamed: 0_level_0,col_0,aggressive
label,responseLabel_aggressive,Unnamed: 2_level_1
Opportunity,negative,10
Opportunity,positive,181
Risk,negative,27
Risk,positive,13
Risk,undetermined,1
