In [110]:
import json

import pandas as pd

import plotly.express as px
import plotly.io as pio

## Data Scope
- Duration from ```2017-01-01``` to ```2022-03-01```
- Searching 4 keywords ["#นัดเย็ดกทม", "#onsกทม", "#fwbกทม", "#นัดเย็ดกรุงเทพ"]

### All User Data

In [111]:
df_user = pd.read_csv("data/total_user_data_20170101_20220301.csv") 
df_user.drop(columns = "Unnamed: 0", inplace = True)

In [112]:
# Drop duplication on 'id'
df_user.drop_duplicates(subset = "id", inplace = True)
df_user['created_at_date_thtz'] = pd.to_datetime(df_user['created_at_date_thtz'])
df_user['created_year_month'] = df_user['created_at_date_thtz'].dt.strftime('%Y-%m')

In [113]:
num_by_day = df_user.groupby("created_at_date_thtz").agg({"id":"count"}).reset_index()
px.line(num_by_day, x = "created_at_date_thtz", y ="id",
        title = "Number of created account : Daily All keywords",
        width = 900, height= 400)

In [114]:
num_by_yearmonth = df_user.groupby("created_year_month").agg({"id":"count"}).reset_index()
px.line(num_by_yearmonth, x = "created_year_month", y ="id",
        title = "Number of created account : Monthly All keywords",
        width = 900, height= 400)

## Tweet Data

In [115]:
df_tweet = pd.read_csv("data/total_tweet_data_20170101_20220301.csv")

In [116]:
'''
Data Cleaning
- Drop un-related column
- Drop duplicate Tweet id
- Convert created date to "Year-Month" for aggregation
'''

df_tweet.drop(columns = ["Unnamed: 0"], inplace = True)
df_tweet.drop_duplicates(subset = "id", inplace = True)
df_tweet['created_at_date_thtz'] = pd.to_datetime(df_tweet['created_at_date_thtz'])
df_tweet['created_year_month'] = df_tweet['created_at_date_thtz'].dt.strftime('%Y-%m')


# Drop latest montn out due to incomplete data
df_tweet = df_tweet[df_tweet['created_year_month'] != '2022-03']

### Overall Number of Tweet

In [117]:
num_by_monthly = df_tweet.groupby(["created_year_month"]).agg({"id":"count"}).reset_index()
num_by_monthly.rename(columns = {"id" : "Number of Tweet",  
                                   "created_year_month" : "Tweet Date"}, inplace = True)

pio.templates.default = "simple_white"
px.line(num_by_monthly, x = "Tweet Date", y ="Number of Tweet",
        title = "Number of Tweet : Monthly Total",
        width = 900, height= 500)

In [118]:
num_by_keymonthly = df_tweet.groupby(["created_year_month", "keywords"]).agg({"id":"count"}).reset_index()
num_by_keymonthly.rename(columns = {"id" : "Number of Tweet",  
                                   "keywords" : "#hashtag",
                                   "created_year_month" : "Tweet Date"}, inplace = True)

pio.templates.default = "simple_white"
num_by_keymonthly_line = px.line(num_by_keymonthly, x = "Tweet Date", y ="Number of Tweet", color = "#hashtag",
                                title = "Number of Tweet : Monthly by keywords",
                                width = 900, height= 500)
num_by_keymonthly_line
#num_by_keymonthly_line.write_html("Number of Tweet - Monthly Keywords.html")

### Location Analysis

In [119]:
 df_tweet['geo'].apply(pd.Series)

Unnamed: 0,0
461,
462,
463,
464,
465,
...,...
953341,
953342,
953343,
953344,


In [104]:
pd.json_normalize(df_tweet.geo)

Unnamed: 0,0
0,
1,
2,
3,
4,
...,...
768626,
768627,
768628,
768629,


In [103]:
geo_location = pd.DataFrame(df_tweet['geo'].value_counts()).reset_index()\
                                           .rename(columns = {"index": "geo", "geo" : "occurence"})
geo_location

for geo_dict in geo_location['geo']: #
    print(json.loads(geo_dict.replace("'", "\"")))

Unnamed: 0,geo,occurence
0,{'place_id': '00f1a9463f5b8801'},384
1,{'place_id': '018cb268038d69d0'},245
2,{'place_id': '49c909a0270e8699'},211
3,{'place_id': '015608e02abc1bca'},206
4,{'place_id': '0149a7b738015c7b'},154
...,...,...
1148,{'place_id': '07d9f780adc86000'},1
1149,{'place_id': '001cce0f542c67b1'},1
1150,{'place_id': '07d9cacee8888000'},1
1151,{'place_id': '0fc294d25b946002'},1


In [96]:
json.loads(geo_location['geo'][0].replace("'", "\""))

{'place_id': '00f1a9463f5b8801'}

In [76]:
df_tweet

Unnamed: 0,reply_settings,author_id,source,lang,created_at,text,conversation_id,public_metrics,id,referenced_tweets,geo,in_reply_to_user_id,created_at_dt,created_at_time,created_at_date,created_at_dt_thtz,created_at_time_thtz,created_at_date_thtz,keywords,created_year_month
461,everyone,1475272800675893251,Twitter for Android,th,2022-02-28T16:59:51.000Z,RT @HostManx: ชอบฟีลถอดชุดเวลาทำรักกัน ❤️\n#ชา...,1498342195510013953,"{'retweet_count': 402, 'reply_count': 0, 'like...",1498342195510013953,"[{'type': 'retweeted', 'id': '1489488323140005...",,,2022-02-28 16:59:51+00:00,16:59:51,2022-02-28,2022-02-28 23:59:51+07:00,23:59:51,2022-02-28,fwbกทม,2022-02
462,everyone,1377086137487876102,Twitter for iPhone,th,2022-02-28T16:59:24.000Z,RT @ig123igmay: รับเสือดาวไปเลี้ยงในห้องไหม #เ...,1498342080891944962,"{'retweet_count': 70, 'reply_count': 0, 'like_...",1498342080891944962,"[{'type': 'retweeted', 'id': '1498255051231744...",,,2022-02-28 16:59:24+00:00,16:59:24,2022-02-28,2022-02-28 23:59:24+07:00,23:59:24,2022-02-28,fwbกทม,2022-02
463,everyone,1185581701403004928,Twitter for iPhone,th,2022-02-28T16:59:14.000Z,RT @singha25201: หลังเลิกงาน วันนี้มีเฮ\nhttps...,1498342040274608128,"{'retweet_count': 843, 'reply_count': 0, 'like...",1498342040274608128,"[{'type': 'retweeted', 'id': '1489901362427363...",,,2022-02-28 16:59:14+00:00,16:59:14,2022-02-28,2022-02-28 23:59:14+07:00,23:59:14,2022-02-28,fwbกทม,2022-02
464,everyone,1250005045157842946,Twitter Web App,th,2022-02-28T16:58:58.000Z,ดึกๆมันเงี่ยน ห้องว่างนะครับ18Y Dmมาเลยครับลับ...,1498341973329018881,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",1498341973329018881,,,,2022-02-28 16:58:58+00:00,16:58:58,2022-02-28,2022-02-28 23:58:58+07:00,23:58:58,2022-02-28,fwbกทม,2022-02
465,everyone,1111203759906906113,Twitter for Android,th,2022-02-28T16:56:54.000Z,RT @plasalmin: จุ๊บ จุ๊บ คนขี้เงี่ยน #เงี่ยน 💋...,1498341450245095424,"{'retweet_count': 119, 'reply_count': 0, 'like...",1498341450245095424,"[{'type': 'retweeted', 'id': '1407958661595492...",,,2022-02-28 16:56:54+00:00,16:56:54,2022-02-28,2022-02-28 23:56:54+07:00,23:56:54,2022-02-28,fwbกทม,2022-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953341,everyone,968824049148116992,Twitter Web App,th,2018-02-28T12:35:54.000Z,เงี่ยนเหงาทักเราได้น่ะ #คุยเสี่ยว #นัดเย็ดกรุ...,968827098730151936,"{'retweet_count': 2, 'reply_count': 7, 'like_c...",968827098730151936,,,,2018-02-28 12:35:54+00:00,12:35:54,2018-02-28,2018-02-28 19:35:54+07:00,19:35:54,2018-02-28,นัดเย็ดกรุงเทพ,2018-02
953342,everyone,918108332132515842,Twitter for iPhone,th,2018-01-16T16:39:49.000Z,นัดเย็ดกรุงเทพครับ,953305803615698945,"{'retweet_count': 0, 'reply_count': 5, 'like_c...",953305803615698945,,,,2018-01-16 16:39:49+00:00,16:39:49,2018-01-16,2018-01-16 23:39:49+07:00,23:39:49,2018-01-16,นัดเย็ดกรุงเทพ,2018-01
953343,everyone,923965091522887680,Twitter for Android,und,2017-10-29T06:48:36.000Z,#สาวสอง #เย็ด #เย็ดตูด #นัดเย็ดกรุงเทพ https:/...,924528374894096384,"{'retweet_count': 0, 'reply_count': 10, 'like_...",924528374894096384,,,,2017-10-29 06:48:36+00:00,06:48:36,2017-10-29,2017-10-29 13:48:36+07:00,13:48:36,2017-10-29,นัดเย็ดกรุงเทพ,2017-10
953344,everyone,887770985994240000,Twitter for Android,th,2017-07-19T21:09:23.000Z,นัดเย็ดกรุงเทพ ..เลียหีจนร้องขอชีวิต,887781438908542976,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",887781438908542976,,,,2017-07-19 21:09:23+00:00,21:09:23,2017-07-19,2017-07-20 04:09:23+07:00,04:09:23,2017-07-20,นัดเย็ดกรุงเทพ,2017-07
