<a href="https://colab.research.google.com/github/victorknox/RedditAnalysis/blob/main/Reddit_MoralFoundations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing data

In [1]:
# Importing the Reddit Comments dataset
import pandas as pd
data = pd.read_csv('Reddit_comments.csv', names=['title', 'body', 'votes', 'subreddit name', 'time', 'date', 'author'])
print(data.head())

                   title  ...         author
0                  title  ...         author
1  Twice - Momo deepfake  ...  beef_smuggler
2  Twice - Momo Deepfake  ...        zamin13
3  Twice - Momo Deepfake  ...       CodaAyax
4  Twice - Momo Deepfake  ...         Emoome

[5 rows x 7 columns]


In [2]:
# Importing the Moral Foundations Dictionary
import pandas as pd
MFD = pd.read_csv('MFD.csv')
print(MFD.head())

         Word  tag1  tag2  tag3
0        safe     1   NaN   NaN
1       peace     1   NaN   NaN
2  compassion     1   NaN   NaN
3      empath     1   NaN   NaN
4     sympath     1   NaN   NaN


In [3]:
comments = data['body']
posts = data['title'].unique()

print(len(comments))
print(len(posts))

86325
6834


# Moral Sentiment detection

	1                    HarmVirtue
    2                    HarmVice
    3                    FairnessVirtue
    4                    FairnessVice
    5                    IngroupVirtue
    6                    IngroupVice
    7                    AuthorityVirtue
    8                    AuthorityVice
    9                    PurityVirtue
    10                    PurityVice


### Function to detect morality sentiments




In [4]:
def morality_function(body, x, y, MFD):
  moral_sentiments_total = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  for post in body[x:y]:
      moral_sentiments = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
      for i in range(len(MFD['Word'])):
        if MFD.iloc[i].Word in str(post):
          if MFD.iloc[i].tag1 <= 10: 
            moral_sentiments[int(MFD.iloc[i].tag1-1)] += 1
          if MFD.iloc[i].tag2 <= 10:
            moral_sentiments[int(MFD.iloc[i].tag2-1)] += 1
          if MFD.iloc[i].tag3 <= 10:
            moral_sentiments[int(MFD.iloc[i].tag3-1)] += 1
      if max(moral_sentiments) > 0:
        moral_sentiments_total[moral_sentiments.index(max(moral_sentiments))] += 1
  return moral_sentiments_total

## Analyzing Morality change over the years


In [5]:
# for posts
morality_posts = []
morality_posts.append(morality_function(posts, 0, 269, MFD))
morality_posts.append(morality_function(posts, 269,(269+1472), MFD))
morality_posts.append(morality_function(posts, (269+1472), (269+1472+2886), MFD))
morality_posts.append(morality_function(posts, (269+1472+2886),(6834), MFD))

print(morality_posts)

[[4, 9, 1, 0, 8, 0, 6, 0, 2, 9], [15, 68, 3, 0, 62, 1, 23, 0, 0, 58], [15, 115, 3, 2, 91, 6, 55, 4, 7, 127], [13, 73, 11, 2, 87, 6, 54, 0, 6, 115]]


In [6]:
# for comments
morality_comments = []
morality_comments.append(morality_function(comments, 0, 3828, MFD))
morality_comments.append(morality_function(comments, 3828,(3828+22773), MFD))
morality_comments.append(morality_function(comments, (3828+22773), (3828+22773+34071), MFD))
morality_comments.append(morality_function(comments, (3828+22773+34071), 86325, MFD))

print(morality_comments)

[[140, 279, 102, 14, 516, 15, 193, 6, 15, 144], [617, 1360, 328, 73, 2895, 66, 708, 26, 111, 715], [828, 1982, 613, 115, 4884, 81, 1119, 53, 184, 1076], [546, 1200, 344, 39, 3239, 73, 848, 40, 104, 833]]


In [7]:
morality_comments_total = [sum(x) for x in zip(*morality_comments)]
morality_posts_total = [sum(x) for x in zip(*morality_posts)]

print(morality_comments_total, morality_posts_total)

[2131, 4821, 1387, 241, 11534, 235, 2868, 125, 414, 2768] [47, 265, 18, 4, 248, 13, 138, 4, 15, 309]


In [8]:
print("Number of comments with no morality sentiment: " + str(len(comments) - sum(morality_comments_total)) + "/" + str(len(comments)) )
print("Number of posts with no morality sentiment: " + str(len(posts) - sum(morality_posts_total)) + "/" + str(len(posts)))

Number of comments with no morality sentiment: 59801/86325
Number of posts with no morality sentiment: 5773/6834


In [9]:
print("posts: ")
for i in range(4):
  print(str(2018+i) + ": " + str(morality_posts[i]))
print("total of posts:" + str(morality_posts_total) )

print()

print("comments: ")
for i in range(4):
  print(str(2018+i) + ": " + str(morality_comments[i]) )
print("total of comments:" + str(morality_comments_total) )

posts: 
2018: [4, 9, 1, 0, 8, 0, 6, 0, 2, 9]
2019: [15, 68, 3, 0, 62, 1, 23, 0, 0, 58]
2020: [15, 115, 3, 2, 91, 6, 55, 4, 7, 127]
2021: [13, 73, 11, 2, 87, 6, 54, 0, 6, 115]
total of posts:[47, 265, 18, 4, 248, 13, 138, 4, 15, 309]

comments: 
2018: [140, 279, 102, 14, 516, 15, 193, 6, 15, 144]
2019: [617, 1360, 328, 73, 2895, 66, 708, 26, 111, 715]
2020: [828, 1982, 613, 115, 4884, 81, 1119, 53, 184, 1076]
2021: [546, 1200, 344, 39, 3239, 73, 848, 40, 104, 833]
total of comments:[2131, 4821, 1387, 241, 11534, 235, 2868, 125, 414, 2768]


## Textual analysis and Classification

#### Function to detect morality and classify posts

In [None]:
def morality_classifier(body, x, y, MFD):
  moral_sentiments_body = [[], [], [], [], [], [], [], [], [], []]
  for post in body[x:y]:
      moral_sentiments = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
      for i in range(len(MFD['Word'])):
        if MFD.iloc[i].Word in str(post):
          if MFD.iloc[i].tag1 <= 10: 
            moral_sentiments[int(MFD.iloc[i].tag1-1)] += 1
          if MFD.iloc[i].tag2 <= 10:
            moral_sentiments[int(MFD.iloc[i].tag2-1)] += 1
          if MFD.iloc[i].tag3 <= 10:
            moral_sentiments[int(MFD.iloc[i].tag3-1)] += 1
      if max(moral_sentiments) > 0:
        moral_sentiments_body[moral_sentiments.index(max(moral_sentiments))].append(post)
        print()
  return moral_sentiments_body

#### Collecting the data

In [None]:
moral_sentiment_index = ['HarmVirtue', 'HarmVice', 'FairnessVirtue', 'FairnessVirtue', 'IngroupVirtue', 'IngroupVice', 'AuthorityVirtue','AuthorityVice', 'PurityVirtue', 'PurityVice']

In [None]:
# for posts
morality_posts = []
morality_posts.append(morality_classifier(posts, 0, 269, MFD))
morality_posts.append(morality_classifier(posts, 269,(269+1472), MFD))
morality_posts.append(morality_classifier(posts, (269+1472), (269+1472+2886), MFD))
morality_posts.append(morality_classifier(posts, (269+1472+2886),(6834), MFD))

In [None]:
import csv  

header = ['body','moral sentiment', 'year']

with open('moral_sentiment_posts.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

    for year in range(len(morality_posts)):
      for i in range(10):
        for post in morality_posts[year][i][0:5]:
          data = [post, moral_sentiment_index[i], 2018 + year]
          writer.writerow(data)

In [None]:
# for comments
morality_comments = []
morality_comments.append(morality_classifier(comments, 0, 3828, MFD))
morality_comments.append(morality_classifier(comments, 3828,(3828+22773), MFD))
morality_comments.append(morality_classifier(comments, (3828+22773), (3828+22773+34071), MFD))
morality_comments.append(morality_classifier(comments, (3828+22773+34071), 86325, MFD))

In [None]:
import csv  

header = ['body','moral sentiment', 'year']

with open('moral_sentiment_comments.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

    for year in range(len(morality_comments)):
      for i in range(10):
        for post in morality_comments[year][i][0:5]:
          data = [post, moral_sentiment_index[i], 2018 + year]
          writer.writerow(data)

#### Function to get top 10 morality words in each dimension

In [34]:
def morality_frequency(body, x, y, MFD):
  MD = {} # moral dictionary to store frequencies
  for i in range(len(MFD['Word'])):
    MD[MFD.iloc[i].Word] = [ 0, MFD.iloc[i].tag1 - 1 ]
  top10 = [[], [], [], [], [], [], [], [], [], []]
  for post in body[x:y]:
      for i in range(len(MFD['Word'])):
        if MFD.iloc[i].Word in str(post):
          MD[MFD.iloc[i].Word][0] += 1
  for w in sorted(MD, key=MD.get, reverse=True):
    # print(w, MD[w])
    if MD[w][1] < 10 and len(top10[MD[w][1]]) < 10 and MD[w][0] > 0:
      top10[MD[w][1]].append(w) 
  return top10

#### Obtaining top 10 words from each year 

In [39]:
moral_sentiment_index = ['HarmVirtue', 'HarmVice', 'FairnessVirtue', 'FairnessVirtue', 'IngroupVirtue', 'IngroupVice', 'AuthorityVirtue','AuthorityVice', 'PurityVirtue', 'PurityVice']
def top10(x):
  for i in range(10):
    print(moral_sentiment_index[i], end = " : " )
    for word in x[i]:
      print(word, end = ", ")
    print()

In [42]:
print("Top 10 words category wise in Posts: ")
print()
print(2018)
top10(morality_frequency(posts, 0, 269, MFD))
print()
print(2019)
top10(morality_frequency(posts, 269,(269+1472), MFD))
print()
print(2020)
top10(morality_frequency(posts, (269+1472), (269+1472+2886), MFD))
print()
print(2021)
top10(morality_frequency(posts, (269+1472+2886),(6834), MFD))

Top 10 words category wise in Posts: 

2018
HarmVirtue : secur, protect, defen, 
HarmVice : war, kill, 
FairnessVirtue : fair, 
FairnessVirtue : 
IngroupVirtue : ally, family, communit, member, 
IngroupVice : terroris, 
AuthorityVirtue : legal, law, order, mother, leader, 
AuthorityVice : illegal, 
PurityVirtue : wholesome, decen, 
PurityVice : sin, filth, pervert, 

2019
HarmVirtue : care, secur, defen, protect, safe, 
HarmVice : war, kill, fight, wars, kills, abuse, ruin, crush, destroy, damag, 
FairnessVirtue : fair, fairly, justifi, honest, 
FairnessVirtue : 
IngroupVirtue : ally, nation, together, group, family, communit, member, 
IngroupVice : individual, 
AuthorityVirtue : law, legal, obey, control, father, mother, status, leader, class, order, 
AuthorityVice : dissent, lawless, 
PurityVirtue : sacred, innocent, 
PurityVice : sin, lax, 

2020
HarmVirtue : protect, care, benefit, secur, safe, caring, guard, 
HarmVice : war, kill, fight, attack, killing, crush, harm, kills, ruin, 

In [43]:
print("Top 10 words category wise in Comments: ")
print()
print(2018)
top10(morality_frequency(comments, 0, 269, MFD))
print()
print(2019)
top10(morality_frequency(comments, 269,(269+1472), MFD))
print()
print(2020)
top10(morality_frequency(comments, (269+1472), (269+1472+2886), MFD))
print()
print(2021)
top10(morality_frequency(comments, (269+1472+2886),(6834), MFD))

Top 10 words category wise in Comments: 

2018
HarmVirtue : care, protect, defen, 
HarmVice : war, fight, ruin, 
FairnessVirtue : fair, honest, fairly, rights, 
FairnessVirtue : 
IngroupVirtue : ally, nation, member, loyal, 
IngroupVice : 
AuthorityVirtue : law, legal, submi, order, tradition, class, command, 
AuthorityVice : oppose, 
PurityVirtue : holy, 
PurityVice : sin, lax, 

2019
HarmVirtue : care, protect, defen, benefit, safe, empath, secur, peace, sympath, caring, 
HarmVice : war, harm, damag, kill, hurt, ruin, destroy, suffer, violen, fight, 
FairnessVirtue : fair, honest, rights, fairly, equal, justifi, reasonable, equivalent, constant, balance, 
FairnessVirtue : unfair, bigot, discriminat, disproportion, exclud, 
IngroupVirtue : ally, member, nation, communit, group, together, family, communis, unite, fellow, 
IngroupVice : individual, deceiv, foreign, immigra, betray, terroris, 
AuthorityVirtue : legal, law, submi, respect, order, control, position, serve, authorit, class,

## Graph Representation of the data

- Posts: https://jsfiddle.net/r2fpqz1u/1/
- Comments: https://jsfiddle.net/3x9fk64j/

In [None]:
print("posts: ")
for i in range(10):
  print("[", end = "")
  for post in morality_posts:
    if i%2 == 0:
      print(post[i], end = ",")
    else: 
      print(-post[i], end = ",")
  print("]")

posts: 
[4,15,15,13,]
[-9,-68,-115,-73,]
[1,3,3,11,]
[0,0,-2,-2,]
[8,62,91,87,]
[0,-1,-6,-6,]
[6,23,55,54,]
[0,0,-4,0,]
[2,0,7,6,]
[-9,-58,-127,-115,]


In [18]:
print("Comments: ")
for i in range(10):
  print("[", end = "")
  for post in morality_comments:
    if i%2 == 0:
      print(post[i], end = ",")
    else: 
      print(-post[i], end = ",")
  print("]")

Comments: 
[140,617,828,546,]
[-279,-1360,-1982,-1200,]
[102,328,613,344,]
[-14,-73,-115,-39,]
[516,2895,4884,3239,]
[-15,-66,-81,-73,]
[193,708,1119,848,]
[-6,-26,-53,-40,]
[15,111,184,104,]
[-144,-715,-1076,-833,]
