# Perspective API Access Code

Author: Sandy Liu \
Input: all files in 'output_female' and 'output_male' \
Output: 'male_data_scores.json'; 'female_data_scores.json'

In [7]:
#pip install --upgrade google-api-python-client

In [29]:
from googleapiclient import discovery
import json

In [30]:
API_KEY = "key"

In [31]:
#build connection with the server
client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

In [32]:
#Testing follow the instruction of Perspective API website tutorial
analyze_request = {
  'comment': { 'text': 'vote   ' },
  'requestedAttributes': {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'INSULT': {}, 'SEXUALLY_EXPLICIT': {}, 'PROFANITY': {}, 'LIKELY_TO_REJECT': {},
                         'THREAT': {}, 'IDENTITY_ATTACK': {}}
}
response = client.comments().analyze(body=analyze_request).execute()

In [33]:
print(json.dumps(response, indent=2))

{
  "attributeScores": {
    "SEXUALLY_EXPLICIT": {
      "spanScores": [
        {
          "begin": 0,
          "end": 7,
          "score": {
            "value": 0.013033954,
            "type": "PROBABILITY"
          }
        }
      ],
      "summaryScore": {
        "value": 0.013033954,
        "type": "PROBABILITY"
      }
    },
    "TOXICITY": {
      "spanScores": [
        {
          "begin": 0,
          "end": 7,
          "score": {
            "value": 0.015707577,
            "type": "PROBABILITY"
          }
        }
      ],
      "summaryScore": {
        "value": 0.015707577,
        "type": "PROBABILITY"
      }
    },
    "SEVERE_TOXICITY": {
      "spanScores": [
        {
          "begin": 0,
          "end": 7,
          "score": {
            "value": 0.0013160706,
            "type": "PROBABILITY"
          }
        }
      ],
      "summaryScore": {
        "value": 0.0013160706,
        "type": "PROBABILITY"
      }
    },
    "IDENTITY_ATTACK": {

In [34]:
#function that parse the result from the Perspective API and output the scores, which are what we need
def retrieveAllScores(data):
    scores = {}

    for attribute, details in data['attributeScores'].items():
        scores[attribute] = details['summaryScore']['value']

    return scores

In [35]:
retrieveAllScores(response)

{'SEXUALLY_EXPLICIT': 0.013033954,
 'TOXICITY': 0.015707577,
 'SEVERE_TOXICITY': 0.0013160706,
 'IDENTITY_ATTACK': 0.0039402717,
 'LIKELY_TO_REJECT': 0.905802,
 'INSULT': 0.008595883,
 'PROFANITY': 0.015454767,
 'THREAT': 0.008440462}

In [36]:
## test on one of our json file
file_path = "/Users/sandyliu/CS315-proj3-group2/1-data_collection/output_female/output_rashidatlaib.json"
with open(file_path, 'r') as file:
    file_data = json.load(file)

In [37]:
file_data

[{'id': '7296925230939852078',
  'hashtags': ['ceasefirenow'],
  'description': [''],
  'comments': ["u need to run as independent for president. you'll win",
   'masyallah',
   'Thank you for speaking up',
   '😂😂😂😂',
   'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.❤️I support you!',
   'You are not alone. Palestine 🇵🇸 is not alone! I’m sorry for the evil being shown to the beautiful people of your country. God loves Palestinians 💜',
   'We stand with you congresswoman rashida ✊🏽🇵🇸❤️FREE PALESTINE FOREVER',
   'Antisemite',
   'Good for her!!',
   'We love you Rashida! The American people are behind you. ❤️❤️❤️❤️🇵🇸',
   'Love you Rashida for your activism and being brave to use your voice even in the face of others who oppose you ❤️💯',
   'She is no lovely nothing she is a domestic terrorist!!!!',
   'WE ARE WITH YOU Rep. Tlaib!!! We are with Gaza, we are with Palestine. CEASEFIRE NOW. ❤️🇵🇸❤️',
   'we see your bravery❤️free palestine✊🏼🗣️

In [38]:
## test with the very first video comments
test_data = file_data[0]
print(test_data)


{'id': '7296925230939852078', 'hashtags': ['ceasefirenow'], 'description': [''], 'comments': ["u need to run as independent for president. you'll win", 'masyallah', 'Thank you for speaking up', '😂😂😂😂', 'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.❤️I support you!', 'You are not alone. Palestine 🇵🇸 is not alone! I’m sorry for the evil being shown to the beautiful people of your country. God loves Palestinians 💜', 'We stand with you congresswoman rashida ✊🏽🇵🇸❤️FREE PALESTINE FOREVER', 'Antisemite', 'Good for her!!', 'We love you Rashida! The American people are behind you. ❤️❤️❤️❤️🇵🇸', 'Love you Rashida for your activism and being brave to use your voice even in the face of others who oppose you ❤️💯', 'She is no lovely nothing she is a domestic terrorist!!!!', 'WE ARE WITH YOU Rep. Tlaib!!! We are with Gaza, we are with Palestine. CEASEFIRE NOW. ❤️🇵🇸❤️', 'we see your bravery❤️free palestine✊🏼🗣️', 'we stand with rashida and palestine ❤️🇵🇸',

In [39]:
## clean up the emojis
#pip install emoji
import emoji
def remove_emojis(text):
    return emoji.replace_emoji(text, replace='')

In [40]:
comments = test_data['comments']

In [41]:
comments

["u need to run as independent for president. you'll win",
 'masyallah',
 'Thank you for speaking up',
 '😂😂😂😂',
 'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.❤️I support you!',
 'You are not alone. Palestine 🇵🇸 is not alone! I’m sorry for the evil being shown to the beautiful people of your country. God loves Palestinians 💜',
 'We stand with you congresswoman rashida ✊🏽🇵🇸❤️FREE PALESTINE FOREVER',
 'Antisemite',
 'Good for her!!',
 'We love you Rashida! The American people are behind you. ❤️❤️❤️❤️🇵🇸',
 'Love you Rashida for your activism and being brave to use your voice even in the face of others who oppose you ❤️💯',
 'She is no lovely nothing she is a domestic terrorist!!!!',
 'WE ARE WITH YOU Rep. Tlaib!!! We are with Gaza, we are with Palestine. CEASEFIRE NOW. ❤️🇵🇸❤️',
 'we see your bravery❤️free palestine✊🏼🗣️',
 'we stand with rashida and palestine ❤️🇵🇸',
 'PLEASE endorse a candidate SOON so they have a fighting chance! We cannot sp

In [42]:
#testing for emoji cleaning
for i, comment in enumerate(comments):
    comments[i] = remove_emojis(comment)
comments

["u need to run as independent for president. you'll win",
 'masyallah',
 'Thank you for speaking up',
 '',
 'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.I support you!',
 'You are not alone. Palestine  is not alone! I’m sorry for the evil being shown to the beautiful people of your country. God loves Palestinians ',
 'We stand with you congresswoman rashida FREE PALESTINE FOREVER',
 'Antisemite',
 'Good for her!!',
 'We love you Rashida! The American people are behind you. ',
 'Love you Rashida for your activism and being brave to use your voice even in the face of others who oppose you ',
 'She is no lovely nothing she is a domestic terrorist!!!!',
 'WE ARE WITH YOU Rep. Tlaib!!! We are with Gaza, we are with Palestine. CEASEFIRE NOW. ',
 'we see your braveryfree palestine',
 'we stand with rashida and palestine ',
 'PLEASE endorse a candidate SOON so they have a fighting chance! We cannot split our votes between West, Stein, and de la

In [43]:
#specify attributes we are acquiring
attributes = {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'INSULT': {}, 'SEXUALLY_EXPLICIT': {}, 'PROFANITY': {}, 'LIKELY_TO_REJECT': {},
                         'THREAT': {}, 'IDENTITY_ATTACK': {}}

In [44]:
#function that feed in one comment and gets scores of that comment
def makePerspectiveRequest(comment):
    analyze_request = {
        'comment': {'text': comment},
        'requestedAttributes': attributes
    }
    #print(analyze_request)
    response = client.comments().analyze(body=analyze_request).execute()
    scores = retrieveAllScores(response)
    return {comment: scores}

In [45]:
makePerspectiveRequest(comments[0])

{"u need to run as independent for president. you'll win": {'THREAT': 0.008582866,
  'SEXUALLY_EXPLICIT': 0.0064285113,
  'TOXICITY': 0.030859824,
  'LIKELY_TO_REJECT': 0.6858806,
  'SEVERE_TOXICITY': 0.0012969971,
  'PROFANITY': 0.0138151245,
  'IDENTITY_ATTACK': 0.00569767,
  'INSULT': 0.012110904}}

In [None]:
#take in a list of comments and use previous defined functions to access the scores
import time

def getCommentListPerspective(comment_list):
    all_scores = []
    for comment in comment_list:
        time.sleep(1)
        if comment != "":
            try:
                result = makePerspectiveRequest(comment)
                all_scores.append(result)
            except:
                print(f"current comment {comment} does not support makePerspectiveRequest")
    return all_scores

In [47]:
comments

["u need to run as independent for president. you'll win",
 'masyallah',
 'Thank you for speaking up',
 '',
 'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.I support you!',
 'You are not alone. Palestine  is not alone! I’m sorry for the evil being shown to the beautiful people of your country. God loves Palestinians ',
 'We stand with you congresswoman rashida FREE PALESTINE FOREVER',
 'Antisemite',
 'Good for her!!',
 'We love you Rashida! The American people are behind you. ',
 'Love you Rashida for your activism and being brave to use your voice even in the face of others who oppose you ',
 'She is no lovely nothing she is a domestic terrorist!!!!',
 'WE ARE WITH YOU Rep. Tlaib!!! We are with Gaza, we are with Palestine. CEASEFIRE NOW. ',
 'we see your braveryfree palestine',
 'we stand with rashida and palestine ',
 'PLEASE endorse a candidate SOON so they have a fighting chance! We cannot split our votes between West, Stein, and de la

In [48]:
getCommentListPerspective(comments)

current comment masyallah does not support makePerspectiveRequest
current comment Antisemite does not support makePerspectiveRequest


[{"u need to run as independent for president. you'll win": {'SEXUALLY_EXPLICIT': 0.0064285113,
   'INSULT': 0.012110904,
   'TOXICITY': 0.030859824,
   'SEVERE_TOXICITY': 0.0012969971,
   'IDENTITY_ATTACK': 0.00569767,
   'LIKELY_TO_REJECT': 0.68588054,
   'PROFANITY': 0.0138151245,
   'THREAT': 0.008582866}},
 {'Thank you for speaking up': {'PROFANITY': 0.015967157,
   'INSULT': 0.010362893,
   'IDENTITY_ATTACK': 0.0045322375,
   'LIKELY_TO_REJECT': 0.6653153,
   'SEVERE_TOXICITY': 0.0013828278,
   'THREAT': 0.007883795,
   'TOXICITY': 0.019477395,
   'SEXUALLY_EXPLICIT': 0.010026119}},
 {'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.I support you!': {'SEXUALLY_EXPLICIT': 0.005101525,
   'SEVERE_TOXICITY': 0.00077724457,
   'THREAT': 0.0072235605,
   'TOXICITY': 0.011560776,
   'PROFANITY': 0.0118338885,
   'INSULT': 0.0073798755,
   'LIKELY_TO_REJECT': 0.21775407,
   'IDENTITY_ATTACK': 0.003884775}},
 {'You are not alone. Palestine  is

In [49]:
test_dic = getCommentListPerspective(comments)

current comment masyallah does not support makePerspectiveRequest
current comment Antisemite does not support makePerspectiveRequest


In [50]:
test_dic

[{"u need to run as independent for president. you'll win": {'SEXUALLY_EXPLICIT': 0.0064285113,
   'PROFANITY': 0.0138151245,
   'LIKELY_TO_REJECT': 0.6858806,
   'TOXICITY': 0.030859824,
   'SEVERE_TOXICITY': 0.0012969971,
   'INSULT': 0.012110904,
   'IDENTITY_ATTACK': 0.00569767,
   'THREAT': 0.008582866}},
 {'Thank you for speaking up': {'SEXUALLY_EXPLICIT': 0.010026119,
   'THREAT': 0.007883795,
   'IDENTITY_ATTACK': 0.0045322375,
   'PROFANITY': 0.015967157,
   'SEVERE_TOXICITY': 0.0013828278,
   'TOXICITY': 0.019477395,
   'LIKELY_TO_REJECT': 0.6653153,
   'INSULT': 0.010362893}},
 {'Rashida, thank you for being a voice for this cause through the pain I’m sure you’re feeling.I support you!': {'LIKELY_TO_REJECT': 0.21775414,
   'PROFANITY': 0.0118338885,
   'INSULT': 0.0073798755,
   'SEVERE_TOXICITY': 0.00077724457,
   'IDENTITY_ATTACK': 0.003884775,
   'THREAT': 0.0072235605,
   'SEXUALLY_EXPLICIT': 0.005101525,
   'TOXICITY': 0.011560776}},
 {'You are not alone. Palestine  is 

In [54]:
#calculate the average score of one video on all attributes, return the average scores in a dictionary
def getAverageScoreOfOneVideo(video_dic):
    all_scores = {}
    total_comments = 0
    
    for comment_dict in video_dic:
        #print("comment_dict:", comment_dict)
        for scores in comment_dict.values():
            total_comments += 1
            #print("scores:", scores)
            for key, value in scores.items():
                #print(key)
                if key in all_scores:
                    all_scores[key] += value
                else:
                    all_scores[key] = value

    average_metrics = {key: value / total_comments for key, value in all_scores.items()}
    
    return average_metrics

In [69]:
getAverageScoreOfOneVideo(test_dic)

{'SEXUALLY_EXPLICIT': 0.016290276763157894,
 'TOXICITY': 0.14881393968421053,
 'INSULT': 0.07697251342105263,
 'SEVERE_TOXICITY': 0.01573523205105263,
 'PROFANITY': 0.05255150315789475,
 'THREAT': 0.03845169683684211,
 'LIKELY_TO_REJECT': 0.7451300752631579,
 'IDENTITY_ATTACK': 0.09252761835789473}

In [55]:
#testing on one account
file_path = "/Users/sandyliu/CS315-proj3-group2/1-data_collection/output_female/output_rashidatlaib.json"

In [56]:
#testing on one account
def getAllVideosAverageOneAccount(file_path):
    output = {}
    with open(file_path, 'r') as file:
        file_data = json.load(file)
    for video in file_data:
        video_id = video['id']
        video_comments = video['comments']
        for i, comment in enumerate(video_comments):
            video_comments[i] = remove_emojis(comment)
        #print(video_comments)
        comment_scores = getCommentListPerspective(video_comments)
        video_score_average = getAverageScoreOfOneVideo(comment_scores)
        if video_score_average != {}:
            output[video_id] = video_score_average
        else:
            print(f"current video: {video_id} has no comment")
    return output

In [53]:
getAllVideosAverageOneAccount(file_path)

current comment masyallah does not support makePerspectiveRequest
current comment Antisemite does not support makePerspectiveRequest


NameError: name 'getAverageScoreOfOneVideo' is not defined

In [65]:
#get all female accounts data
import os
import json

def getAllFemaleAccountData():
    all_female = {}
    directory_path = "/Users/sandyliu/CS315-proj3-group2/1-data_collection/output_female/"
    for filename in os.listdir(directory_path):
        account_name = filename.split('.')[0].split('_')[1]
        file_path = os.path.join(directory_path, filename)
        all_female[account_name] = getAllVideosAverageOneAccount(file_path)
    
    output_file_path = "female_data_scores.json"
    
    # Write the dictionary to a file in JSON format
    with open(output_file_path, 'w') as json_file:
        json.dump(all_female, json_file, indent=4)
    return

In [66]:
female_result = getAllFemaleAccountData()

current comment 100%!!!!!!!!! does not support makePerspectiveRequest
current comment @ does not support makePerspectiveRequest
current comment @ does not support makePerspectiveRequest
current comment  Droppppp does not support makePerspectiveRequest
current comment Muchas gracias  does not support makePerspectiveRequest
current comment Bravo madame does not support makePerspectiveRequest
current comment 100% does not support makePerspectiveRequest
current comment   does not support makePerspectiveRequest
current comment Viva Mexico  does not support makePerspectiveRequest
current comment Don’t ban tik tok!!! does not support makePerspectiveRequest
current comment Get em!! Vote  does not support makePerspectiveRequest
current comment hoorah does not support makePerspectiveRequest
current comment Yaas queen does not support makePerspectiveRequest
current comment BIDEM HARRIS 2024!! does not support makePerspectiveRequest
current comment chef's kiss! does not support makePerspectiveRequ

In [67]:
# get all male accounts data
import os
import json

def getAllMaleAccountData():
    all_female = {}
    directory_path = "/Users/sandyliu/CS315-proj3-group2/1-data_collection/output_male/"
    for filename in os.listdir(directory_path):
        account_name = filename.split('.')[0].split('_')[1]
        file_path = os.path.join(directory_path, filename)
        all_female[account_name] = getAllVideosAverageOneAccount(file_path)
    
    output_file_path = "male_data_scores.json"
    
    # Write the dictionary to a file in JSON format
    with open(output_file_path, 'w') as json_file:
        json.dump(all_female, json_file, indent=4)
    return

In [68]:
getAllMaleAccountData()

current video: 7355557230193773866 has no comment
current video: 7336669445185637674 has no comment
current video: 7162528866681031979 has no comment
current video: 7157467423510154542 has no comment
current video: 7152204323705883950 has no comment
current comment fyp does not support makePerspectiveRequest
current comment ughhhhhhhhhhhhhhhh does not support makePerspectiveRequest
current comment Yess does not support makePerspectiveRequest
current comment Whooo !! does not support makePerspectiveRequest
current comment Protect TikTok does not support makePerspectiveRequest
current comment 3.14157??? does not support makePerspectiveRequest
current comment Hahahaha! does not support makePerspectiveRequest
current comment Hahahahaha does not support makePerspectiveRequest
current comment Don’t ban TikTok does not support makePerspectiveRequest
current comment Don’t ban TikTok does not support makePerspectiveRequest
current comment True dat does not support makePerspectiveRequest
current