<a href="https://colab.research.google.com/github/ykim71/google_toxicity/blob/main/google_toxicity_update.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load File via GDrive



In [None]:
"""
Run this code and it will bring your Google account access permission. 
This gives Colab direct access for any files in your Google Drive.
OR, upload file from local
"""
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
"""
This code changes your working directory that Colab is on. I created and set 'toxicity' folder in my Google Drive. 
I can load and save data on the 'toxicity' folder in my Google Drive
"""
%cd drive/'MyDrive'/toxicity/

/content/drive/MyDrive/toxicity


## load your file to Colab

In [None]:
"""
load your file on Colab using following code. Replace 'sample_code_review.csv' with your file name. 
I set my data as 'sample_text' so you can replce it other name. 
"""

import pandas as pd

sample_text = pd.read_csv('sample_text.csv') # OR, /content/sample_code_review.csv


In [None]:
"""
take random 3 samples to see if data has loaded successfully; 'text' is the column that you want to analyze.
"""

sample_text.sample(10)

Unnamed: 0,id,case,text
700,701,11308,IMPORTANT ELECTIONS ARE COMING UP THIS NOVEMBE...
597,598,9597,It was wonderful to be with members of the com...
4,5,35,More apparent Defunding Police Sentiment bias ...
785,786,12749,Prayers for Officer Sepulveda This is a re-sha...
351,352,5569,"300300 f MAY 4, 2022 INTERNATIONAL FIREFIGHTER..."
811,812,13121,What an honor to have the endorsement of Gover...
360,361,5669,Take her back to the coal mine to be charged. ...
795,796,12907,Greg AbbottBiden's inflation is crushing Ameri...
337,338,5374,"Happy Earth Day! Now more than ever, it’s impo..."
665,666,10807,I agree w/Sen. Joe Manchin. Biden Administrati...


# Perpective API toxicity 



> Language Attributes: https://developers.perspectiveapi.com/s/about-the-api-attributes-and-languages



> API Request: https://developers.perspectiveapi.com/s/docs-get-started (note UT Google Account may not work; recommend using personal Google account for request)



In [None]:
"""
load packages/libraries
"""
from googleapiclient import discovery
from googleapiclient.errors import HttpError


In [None]:
"""
Enter your API here;
"""
API_KEY='your-api-key'


In [None]:
"""
Run this code if you want to analyze text data 4 measures of Toxicity, Likely to reject, Insult, and Identity Attact. 
See below comments for other variables and descriptions in detail.

"""
# variable descriptions: https://github.com/conversationai/perspectiveapi
# you can replace toxicity attributes here:
analyze_request = {
   'comment': { 'text': 'xx'}, # setting formats (id, text)
   'requestedAttributes': {'TOXICITY@6': {}, # see the actual variable name from the Perspective API page
                           'LIKELY_TO_REJECT@2': {}, 
                           'INSULT': {}, 
                           'IDENTITY_ATTACK': {} 
                           },
   'doNotStore': True, # for other settings, https://developers.perspectiveapi.com/s/about-the-api-methods
   'languages' : 'en'
}



In [None]:
# for a single text
import json

def incivility_measures(text):
  
  service = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,)
  
  analyze_request['comment']['text'] = text
  
  response = service.comments().analyze(body=analyze_request).execute()
  i = json.loads(json.dumps(response, indent=2))
  
  toxicity = i['attributeScores']['TOXICITY@6']['summaryScore']['value']
  reject = i['attributeScores']['LIKELY_TO_REJECT@2']['summaryScore']['value']
  insult = i['attributeScores']['INSULT']['summaryScore']['value']
  identity = i['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']
  
  print("text:" + text + "\ntoxicity:" + str(toxicity) + "\nreject:" + str(reject) + "\ninsult:" + str(insult) + "\nidentity:" + str(identity))


In [None]:

text = 'To all those thinking about voting for Trump, remember...  IN YOUR HEART... YOU KNOW HE\'S SHIT'
incivility_measures(text)

text = sample_text.text[1]
incivility_measures(text)


text:Just Say No To Woke: ‘Top Gun: Maverick’ Crushing Box Office With Pro-America Messaging Is A Wake-Up Call For Hollywood.
toxicity:0.2062748
reject:0.4401471
insult:0.02567133
identity:0.009175468


In [None]:
"""
For large text: 
assign your column name (that contain text data you want to analyze) in the code
"""
import csv
import codecs
import json
import time
import pandas as pd

# setting attributes, can add more attiributes 

service = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

start = time.time()

comments_toxicity_list = []
comments_reject_list = []
comments_insult_list = []
comments_identity_list = []

"""
HERE is the code you need to chage. My data file name is 'sample_text' and the column name is 'text'. 
You can change your file and the text column name here.  
For other example, if your data name is 'df' and the text columne name is 'comment', 
the first line of following code is supposed to be:

for i in df.comment.values.tolist(): 

Once you change this code, run this code block.
"""

for i in sample_text.text.values.tolist(): 
  analyze_request['comment']['text'] = i
  
  try:
    response = service.comments().analyze(body=analyze_request).execute()
    i = json.loads(json.dumps(response, indent=2))
    
    comments_toxicity = i['attributeScores']['TOXICITY@6']['summaryScore']['value']
    comments_reject = i['attributeScores']['LIKELY_TO_REJECT@2']['summaryScore']['value']
    comments_insult = i['attributeScores']['INSULT']['summaryScore']['value']
    comments_identity = i['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']
        
  except HttpError:
    comments_toxicity = "error"
    comments_reject = "error"
    comments_insult = "error"
    comments_identity = "error"
            
  comments_toxicity_list.append(comments_toxicity)
  comments_reject_list.append(comments_reject)
  comments_insult_list.append(comments_insult)
  comments_identity_list.append(comments_identity)
        
sample_text = sample_text.join(pd.DataFrame({'toxicity': comments_toxicity_list, 
                                             'reject': comments_reject_list, 
                                             'insult': comments_insult_list, 
                                             'attack': comments_identity_list}))

end=time.time()
print("complete time: ", round(end -start, 2))

complete time:  105.71


In [None]:
"""
take random the first 10 text to see if data has computed successfully;
"""
sample_text[0:9]

Unnamed: 0,id,case,text,toxicity,reject,insult,attack
496,497,8035,The meeting below has been about 24-30 months ...,error,error,error,error
318,319,5010,My mother grew up attending segregated schools...,error,error,error,error
83,84,1000,Biden and his administration have shown a will...,0.263445,0.293414,0.022771,0.008768


In [None]:
"""
Exporting the results -- Save the data to your Google Drive OR the Colab environment. 
You can also find your data at the folder icon at the left side and download it.
"""

sample_text.to_csv('toxicity.csv')