### This notebook contains functions for handling REST APIs

In [1]:
dataset = '../data/comments.csv'

In [10]:
import pandas as pd
import json

In [3]:
df = pd.read_csv(dataset)

In [19]:
df.head(25).sort_values(by=['created_at'])

Unnamed: 0,id,username,text,created_at,subfeddit_id,subfeddit_name,sentiment_class,sentiment_score
24,25,user_24,Love it. It looks great!,1711362847,1,Dummy Topic 1,positive,0.8622
23,24,user_23,It looks great! Enjoy!,1711366447,1,Dummy Topic 1,positive,0.8353
22,23,user_22,It looks great! Proud of you.,1711370047,1,Dummy Topic 1,positive,0.8172
21,22,user_21,It looks great! Luckily you did it.,1711373647,1,Dummy Topic 1,positive,0.8268
20,21,user_20,It looks great! Good work.,1711377247,1,Dummy Topic 1,positive,0.807
19,20,user_19,It looks great! Like it a lot!,1711380847,1,Dummy Topic 1,positive,0.8011
18,19,user_18,It looks great! Thumbs up.,1711384447,1,Dummy Topic 1,positive,0.6588
17,18,user_17,It looks great! What you did was right.,1711388047,1,Dummy Topic 1,positive,0.6588
16,17,user_16,It looks great! Looks decent.,1711391647,1,Dummy Topic 1,positive,0.6588
15,16,user_15,It looks great! Well done!,1711395247,1,Dummy Topic 1,positive,0.7772


In [5]:
df_sample = df.sample(n=150, weights=df['subfeddit_id'], random_state=42)

In [6]:
df_sample.to_csv('../tests/dataset_sample.csv')

In [7]:
df_sample.info()

<class 'pandas.core.frame.DataFrame'>
Index: 150 entries, 54764 to 45061
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               150 non-null    int64  
 1   username         150 non-null    object 
 2   text             150 non-null    object 
 3   created_at       150 non-null    int64  
 4   subfeddit_id     150 non-null    int64  
 5   subfeddit_name   150 non-null    object 
 6   sentiment_class  150 non-null    object 
 7   sentiment_score  150 non-null    float64
dtypes: float64(1), int64(3), object(4)
memory usage: 10.5+ KB


In [28]:
class NoSubfeddit(Exception):
    pass

def get_comments_from_dataset(df, subfeddit_id=None, subfeddit_name=None, limit=25, start_date=None, end_date=None, sorted_by_polarity=False, polarity_sort='asc'):
    if (subfeddit_id == None) and (subfeddit_name == None):
        raise NoSubfeddit('No Subfeddit Provided')
    elif subfeddit_name != None:
        df = df[df['subfeddit_name'] == subfeddit_name] 
    else:
        df = df[df['subfeddit_id'] == subfeddit_id] 

    if (start_date != None) and (end_date != None):
        # find comments in that date range
        df = df[(df['created_at'] >= start_date) & (df['created_at'] <= end_date)]
    
    if sorted_by_polarity == False:
        # return the most recent comments by default, with a skip and limit
        return df.sort_values(by=['created_at'], ascending=False)[:limit][['id', 'text', 'sentiment_class', 'sentiment_score']].to_json(orient='records')
    else:
        if polarity_sort == 'asc':
            return df.sort_values(by=['sentiment_score'], ascending=True)[:limit][['id', 'text', 'sentiment_class', 'sentiment_score']].to_json(orient='records')
        else:
            return df.sort_values(by=['sentiment_score'], ascending=False)[:limit][['id', 'text', 'sentiment_class', 'sentiment_score']].to_json(orient='records')

In [33]:
resp_json = get_comments_from_dataset(df_sample, subfeddit_id=1, sorted_by_polarity=True, polarity_sort='desc')
pd.DataFrame(json.loads(resp_json))

Unnamed: 0,id,text,sentiment_class,sentiment_score
0,6361,Awesome. Like it a lot! Love it. It looks great!,positive,0.9476
1,6960,Awesome. Enjoy! Awesome. Enjoy!,positive,0.9449
2,7466,Well done! Awesome. Luckily you did it. Love it.,positive,0.9324
3,22273,Enjoy! Luckily you did it. Like it a lot! It l...,positive,0.9322
4,4166,Love it. Well done! Proud of you. Love it.,positive,0.9312
5,5145,Love it. Proud of you. Like it a lot! Good work.,positive,0.9184
6,15580,Like it a lot! Enjoy! Love it. Well done!,positive,0.9165
7,15087,Like it a lot! Like it a lot! Good work. Awesome.,positive,0.9115
8,19766,Proud of you. Looks decent. Awesome. Love it.,positive,0.9081
9,10417,Looks decent. Enjoy! Well done! It looks great!,positive,0.8827


In [34]:
resp_json

'[{"id":6361,"text":"Awesome. Like it a lot! Love it. It looks great!","sentiment_class":"positive","sentiment_score":0.9476},{"id":6960,"text":"Awesome. Enjoy! Awesome. Enjoy!","sentiment_class":"positive","sentiment_score":0.9449},{"id":7466,"text":"Well done! Awesome. Luckily you did it. Love it.","sentiment_class":"positive","sentiment_score":0.9324},{"id":22273,"text":"Enjoy! Luckily you did it. Like it a lot! It looks great!","sentiment_class":"positive","sentiment_score":0.9322},{"id":4166,"text":"Love it. Well done! Proud of you. Love it.","sentiment_class":"positive","sentiment_score":0.9312},{"id":5145,"text":"Love it. Proud of you. Like it a lot! Good work.","sentiment_class":"positive","sentiment_score":0.9184},{"id":15580,"text":"Like it a lot! Enjoy! Love it. Well done!","sentiment_class":"positive","sentiment_score":0.9165},{"id":15087,"text":"Like it a lot! Like it a lot! Good work. Awesome.","sentiment_class":"positive","sentiment_score":0.9115},{"id":19766,"text":"Pro