In [1]:
import pandas as pd
from datetime import datetime as dt
import requests 
import re
import html
import wget
import os
import shutil

In [2]:
token = open("../token.txt", "r").read()

# Get channels and their IDs
Info: https://api.slack.com/methods/channels.list/test <br>
https://api.slack.com/methods/users.info/test

In [3]:
def get_user_name(user_id):
    """Get an user's name given their Slack ID"""
    address = f"https://slack.com/api/users.info?token={token}&user={user_id}&pretty=1"
    r = requests.get(address)
    if '"error": "user_not_found"' in str(r.content): return None
    user_name = str(r.content).split('real_name_normalized": ')[1].split(",")[0].split('"')[1]
    return user_name

In [4]:
def get_channels():
    """Returns a data frame with of all the public channels in your Slack workspace"""
    address = f"https://slack.com/api/channels.list?token={token}&pretty=1"
    channels = sorted(requests.get(address).json()['channels'], key=lambda k: k['created'])
    df_channels = []
    for channel in channels :
        channel_id = channel['id']
        channel_name = channel['name']
        created_date = dt.fromtimestamp(channel['created']).strftime('%Y-%m-%d')
        created_by = get_user_name(channel['creator'])
        df_channels.append([channel_id, channel_name, created_date, created_by])
    df_channels = pd.DataFrame(df_channels)
    df_channels.columns = ['channel_id', 'channel_name', 'created_date', 'created_by']
    return df_channels

In [5]:
channels = get_channels()

In [6]:
channels.to_csv("../info/channels.csv", index=False)

In [7]:
channels = pd.read_csv("../info/channels_labeled_google.csv")
channels = channels[channels['keep']==1].drop(['keep'], axis=1).reset_index(drop=True)
channels['created_by'] = channels['created_by'].apply(lambda full_name: full_name.split()[0]) # hide last names
channels['folder_name'] = channels['actual_name'].apply(lambda name: "_".join(name.lower().split()))
channels

Unnamed: 0,channel_id,channel_name,created_date,created_by,folder,actual_name,folder_name
0,C0GQ664PQ,general,2015-12-16,Paul,others,general,general
1,C0GQ0GH1S,random,2015-12-16,Paul,others,random,random
2,CBH1274KV,msan501,2018-07-01,Terence,mod1_summer,Computation for Analytics,computation_for_analytics
3,CBL5QGXCG,msan502-2018,2018-07-08,Kirsten,mod1_summer,Linear Algebra,linear_algebra
4,CBMV8E51R,msan504_2018,2018-07-10,Jeff,mod1_summer,Probability and Statistics,probability_and_statistics
5,CBN6PBL9X,msds_593_2018,2018-07-11,Paul,mod1_summer,Exploratory Data Analysis with R,exploratory_data_analysis_with_r
6,CBUMPR80J,msds_610_2018,2018-07-21,Brian,mod2_fall1,Communications for Analytics,communications_for_analytics
7,CC7KMCM1D,msds692,2018-08-10,Terence,mod2_fall1,Data Acquisition,data_acquisition
8,CC7S5NSE7,msds691_2018,2018-08-13,Nick,mod2_fall1,Relational Databases (SQL),relational_databases_(sql)
9,CCBDZ180N,msan601_2018,2018-08-16,Jeff,mod2_fall1,Linear Regression Analysis,linear_regression_analysis


In [8]:
channels.to_csv("../info/channels_labeled.csv", index=False)

# Get all history from channels
Info: https://api.slack.com/methods/channels.history/test

In [9]:
init_date = 1530835200 # July 6th, 2018

In [10]:
def get_channel_history(channel):
    """Returns a dictionary with all the messages in the given channel"""
    since = init_date
    address = f"https://slack.com/api/channels.history?token={token}&channel={channel}&count={1000}&oldest={since}%20&pretty=1"
    data = requests.get(address).json()
    
    since = data['messages'][0]['ts']
    while data['has_more']:
        address = f"https://slack.com/api/channels.history?token={token}&channel={channel}&count={1000}&oldest={since}%20&pretty=1"
        new_data = requests.get(address).json()
        data['messages'] = new_data['messages'] + data['messages']
        since = new_data['messages'][0]['ts']
        data['has_more'] = new_data['has_more']
    return data['messages']

In [11]:
for channel in channels.channel_id:
    path1 = list("../data/" + channels[channels['channel_id']==channel]['folder'])[0]
    path2 = path1 + "/" + list(channels[channels['channel_id']==channel]['folder_name']+".json")[0]
    if os.path.exists(path1) == False:
        os.mkdir(path1) 
    json.dump(get_channel_history(channel),open(path2,"w"))

# Get members

In [12]:
# Messages as of Apr 26, 2019
# (This could be improved by getting all the users from all the messages in the data folder)
channels_hist = pd.read_csv("../info/channels_messages04262019.csv")

In [13]:
messages = str(channels_hist.messages.values)

In [14]:
users = set(re.findall("[U][A-Z0-9]{8}",str(messages)))

In [15]:
members = []
for user in users:
    name = get_user_name(user)
    if name is not None: members.append([user,get_user_name(user)])

members = pd.DataFrame(members)
members.columns = ['member_id', 'member_name']

In [16]:
members.to_csv("../info/members.csv", index=False)

In [17]:
members = pd.read_csv("../info/members_labeled.csv")

In [18]:
members.member_type.value_counts()

student        83
guest          28
professor      18
deactivated    12
bot             3
Name: member_type, dtype: int64

In [19]:
members[members["member_type"]=="student"].country.value_counts()

China        31
USA          29
India         7
Taiwan        2
Korea         1
England       1
Spain         1
Colombia      1
Canada        1
Germany       1
Vietnam       1
Singapore     1
Peru          1
Armenia       1
Brazil        1
Malaysia      1
Tunisia       1
Russia        1
Name: country, dtype: int64

# Get emojis
Info: https://api.slack.com/methods/emoji.list/test <br>
https://stackoverflow.com/questions/39490865/how-can-i-get-the-full-list-of-slack-emoji-through-api/39654939 <br>
https://unicodey.com/emoji-data/table.htm

### Get default emojis

In [20]:
def get_default_emojis():
    if os.path.exists('../info/default_emojis/'):
        shutil.rmtree('../info/default_emojis/')
    os.mkdir('../info/default_emojis/') 
    default_emojis = [(emoji['short_name'],emoji['image']) for emoji in json.load(open("../info/all_default_emojis.json"))]
    emojis = []
    for emoji in default_emojis:
        name = emoji[0]
        url = f"https://unicodey.com/emoji-data/img-apple-64/{emoji[1]}"
        ext = url.split('.')[-1]
        path = f"../info/default_emojis/{name}.{ext}"
        try:
            wget.download(url, path)
        except:
            path = None
        emojis.append([name,path,"default"])
    default_emojis = pd.DataFrame(emojis, columns=['name','image','type'])
    return default_emojis

In [21]:
default_emojis = get_default_emojis()

In [22]:
default_emojis.head()

Unnamed: 0,name,image,type
0,hash,../info/default_emojis/hash.png,default
1,keycap_star,../info/default_emojis/keycap_star.png,default
2,zero,../info/default_emojis/zero.png,default
3,one,../info/default_emojis/one.png,default
4,two,../info/default_emojis/two.png,default


In [23]:
default_emojis.to_csv("../info/default_emojis.csv", index=False)

### Get custom emojis

In [24]:
def get_custom_emojis():
    if os.path.exists('../info/custom_emojis/'):
        shutil.rmtree('../info/custom_emojis/')
    os.mkdir('../info/custom_emojis/') 
    address = f"https://slack.com/api/emoji.list?token={token}&pretty=1"
    custom_emojis = sorted(requests.get(address).json()['emoji'].items(), key=lambda k: k[1], reverse=True)
    emojis = []
    for emoji in custom_emojis:
        name = emoji[0]
        url = emoji[1]
        if "alias" not in url:
            ext = url.split('.')[-1]
            path = f"../info/custom_emojis/{name}.{ext}"
            wget.download(url, path)
            emojis.append([name,path,"custom"])
        else:
            if url.split(":")[1] in list(zip(*emojis))[0]:
                path = emojis[list(zip(*emojis))[0].index(url.split(":")[1])]
                emojis.append([name,path,"custom"])
            else: 
                path = default_emojis[default_emojis['name']==url.split(":")[1]].image.values[0]
                emojis.append([name,path,"default"])
    custom_emojis = pd.DataFrame(emojis, columns=['name','image','type'])
    return custom_emojis

In [25]:
custom_emojis = get_custom_emojis()

In [26]:
custom_emojis.head()

Unnamed: 0,name,image,type
0,zombie_walking,../info/custom_emojis/zombie_walking.gif,custom
1,zoidberg,../info/custom_emojis/zoidberg.jpg,custom
2,young_jeff,../info/custom_emojis/young_jeff.jpg,custom
3,yoga_flow,../info/custom_emojis/yoga_flow.gif,custom
4,yodawg,../info/custom_emojis/yodawg.png,custom


In [27]:
custom_emojis.to_csv("../info/custom_emojis.csv", index=False)

### Join emojis

In [28]:
emojis = pd.concat([default_emojis,custom_emojis])

In [29]:
emojis.head()

Unnamed: 0,name,image,type
0,hash,../info/default_emojis/hash.png,default
1,keycap_star,../info/default_emojis/keycap_star.png,default
2,zero,../info/default_emojis/zero.png,default
3,one,../info/default_emojis/one.png,default
4,two,../info/default_emojis/two.png,default


In [30]:
emojis.type.value_counts()

default    1571
custom      586
Name: type, dtype: int64

In [31]:
emojis.to_csv("../info/emojis.csv", index=False)

#### Aux function

In [32]:
def get_emoji_html(emoji_shortname):
    emojis = json.load(open("../info/all_default_emojis.json"))
    emoji = list(filter(lambda emoji: emoji['short_name'] == emoji_shortname, emojis))[0]
    unified = emoji['unified']
    return html.unescape("".join(["&#x"+t+";" for t in unified.split('-')]))

In [33]:
get_emoji_html("flag-co")

'🇨🇴'

Spice it up with: https://slackmojis.com/