In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [330]:
import os
import sys
import pandas as pd
import numpy as np
import importlib
import json
import hashlib

# Link Reapply Workflow
module_path = os.path.abspath(os.path.join('..'))

sys.path = list(filter(lambda x: x!=module_path, sys.path))
sys.path.append(module_path)
# Reapply workflows is now linked
# use with 'from reapply_workflows import ......'
%aimport reapply_workflows

from reapply_workflows import Reapply

In [337]:
def add_ids(data: pd.DataFrame, label: str):
    data = data.copy()

    data = data.round(5)

    # Add ids which change on row label change
    data.set_index(
        data.apply(lambda row: getUIDForString(str(row[label])), axis=1),
        inplace=True,
        verify_integrity=True,
    )

    data.reset_index(level=0, inplace=True)
    data.rename(columns={"index": "id"}, inplace=True)

    # Add iids which change on value change
    data.set_index(
        data.apply(
            lambda row: getUIDForString("_".join(row.values.astype(str))), axis=1
        ),
        inplace=True,
        verify_integrity=True,
    )

    data.reset_index(level=0, inplace=True)
    data.rename(columns={"index": "iid"}, inplace=True)

    return data

def getUIDForString(toHash: str):
  md5 = hashlib.md5(toHash.encode())
  return md5.hexdigest()


In [338]:
with open("./gapminder_1980.csv") as f:
  data = pd.read_csv(f)
  data = add_ids(data, 'country')
data.head()

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.5,africa,8340000.0,47.6,3180.0,238.0
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8


# Selections

In [339]:
with open("./selections.json") as f:
    wf = json.load(f)

r = Reapply()
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr,_Selections
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8,False
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0,True
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.50,africa,8340000.0,47.6,3180.0,238.0,True
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8,False
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8,False
...,...,...,...,...,...,...,...,...,...,...
124,9ac3256452dbed23b59321a91146900e,75497a22409db78dcc52c291e078bc10,Uruguay,2.73,americas,2920000.0,70.4,7360.0,39.7,False
125,b7b2832abd95dfaab237fb9314167ed3,fe07e488fe56260f45240c1c8a8882a8,Vanuatu,5.58,asia,116000.0,61.0,2070.0,68.7,False
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.20,americas,15200000.0,69.4,14300.0,43.9,False
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.09,africa,5850000.0,56.0,1290.0,156.0,True


# Filter

In [340]:
with open("./filter.json") as f:
    wf = json.load(f)

r = Reapply()
# Also a preview
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr,Filtered
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8,True
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0,False
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.50,africa,8340000.0,47.6,3180.0,238.0,False
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8,True
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8,True
...,...,...,...,...,...,...,...,...,...,...
124,9ac3256452dbed23b59321a91146900e,75497a22409db78dcc52c291e078bc10,Uruguay,2.73,americas,2920000.0,70.4,7360.0,39.7,True
125,b7b2832abd95dfaab237fb9314167ed3,fe07e488fe56260f45240c1c8a8882a8,Vanuatu,5.58,asia,116000.0,61.0,2070.0,68.7,True
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.20,americas,15200000.0,69.4,14300.0,43.9,True
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.09,africa,5850000.0,56.0,1290.0,156.0,False


# Label

In [335]:
with open("./label.json") as f:
    wf = json.load(f)

r = Reapply()
# Empty string
# Showing multiple labels? -> 
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr,Labels
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8,Unassigned
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0,test_label_1
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.50,africa,8340000.0,47.6,3180.0,238.0,test_label_1
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8,Unassigned
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8,test_label_2
...,...,...,...,...,...,...,...,...,...,...
124,9ac3256452dbed23b59321a91146900e,75497a22409db78dcc52c291e078bc10,Uruguay,2.73,americas,2920000.0,70.4,7360.0,39.7,test_label_2
125,b7b2832abd95dfaab237fb9314167ed3,fe07e488fe56260f45240c1c8a8882a8,Vanuatu,5.58,asia,116000.0,61.0,2070.0,68.7,Unassigned
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.20,americas,15200000.0,69.4,14300.0,43.9,Unassigned
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.09,africa,5850000.0,56.0,1290.0,156.0,test_label_1


# Categorize

In [336]:
with open("./categorize.json") as f:
    wf = json.load(f)

r = Reapply()
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr,Test Category
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8,
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0,Option 1
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.50,africa,8340000.0,47.6,3180.0,238.0,Option 1
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8,
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8,Option 2
...,...,...,...,...,...,...,...,...,...,...
124,9ac3256452dbed23b59321a91146900e,75497a22409db78dcc52c291e078bc10,Uruguay,2.73,americas,2920000.0,70.4,7360.0,39.7,Option 2
125,b7b2832abd95dfaab237fb9314167ed3,fe07e488fe56260f45240c1c8a8882a8,Vanuatu,5.58,asia,116000.0,61.0,2070.0,68.7,
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.20,americas,15200000.0,69.4,14300.0,43.9,
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.09,africa,5850000.0,56.0,1290.0,156.0,Option 1


In [328]:
with open("./categorize 2.json") as f:
    wf = json.load(f)

r = Reapply()
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr,Test Category,Test Category 2
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.62,europe,2680000.0,71.7,1990.0,80.8,,
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.79,africa,19200000.0,62.8,3640.0,149.0,Option 1,
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.50,africa,8340000.0,47.6,3180.0,238.0,Option 1,
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.12,americas,61900.0,71.1,6370.0,44.8,,
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.33,americas,27900000.0,70.1,7910.0,42.8,,Option Category 2
...,...,...,...,...,...,...,...,...,...,...,...
124,9ac3256452dbed23b59321a91146900e,75497a22409db78dcc52c291e078bc10,Uruguay,2.73,americas,2920000.0,70.4,7360.0,39.7,,Option Category 2
125,b7b2832abd95dfaab237fb9314167ed3,fe07e488fe56260f45240c1c8a8882a8,Vanuatu,5.58,asia,116000.0,61.0,2070.0,68.7,,
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.20,americas,15200000.0,69.4,14300.0,43.9,,
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.09,africa,5850000.0,56.0,1290.0,156.0,Option 1,


# Aggregation

In [329]:
with open("./agg.json") as f:
    wf = json.load(f)

r = Reapply()
# Add aggregate to ids
# Take a label for aggregate
# No need for showing aggregate by info for columns
recs = r.apply(wf['interactions'], data)
recs[-1].df

Unnamed: 0,iid,id,country,tfr,continent,population,life_exp,gdp,cmr
0,72446c56b549652b274c6e9181558711,c420ddff824a5c0eec70dd23d62496bc,Albania,3.620000,europe,2.680000e+06,71.700000,1.990000e+03,80.800000
1,429079562aaef4ceb00dc9aa90df68e4,575b9408b6daa2ddcefbcf6d81c9b4c9,Algeria,6.790000,africa,1.920000e+07,62.800000,3.640000e+03,149.000000
2,f17777056433e867b34a50ddbbe6bd43,232bf11cb81bcdb269f76a08fde8b947,Angola,7.500000,africa,8.340000e+06,47.600000,3.180000e+03,238.000000
3,3e0b86945744fcc37eb53c67f42f5521,f7e68bf0791888ebcd5bfc62e022aa83,Antigua and Barbuda,2.120000,americas,6.190000e+04,71.100000,6.370000e+03,44.800000
4,ac1e36a436482b405a8621aae3801634,3536be57ce0713954e454ae6c53ec023,Argentina,3.330000,americas,2.790000e+07,70.100000,7.910000e+03,42.800000
...,...,...,...,...,...,...,...,...,...
126,57e57d86a993a865423dfa433bd7b2a6,e95294b730f61c8175550ec244bfcb50,Venezuela,4.200000,americas,1.520000e+07,69.400000,1.430000e+04,43.900000
127,fc6ba0e72ab596b851ba1026d4ebefd7,b0aa0804e676a38255af4fd70236af7c,Zambia,7.090000,africa,5.850000e+06,56.000000,1.290000e+03,156.000000
128,ded8203952f9e799b681036a7f243cf1,9d5116a2451bc98c2b46b93acbc1b4f0,Zimbabwe,7.100000,africa,7.410000e+06,59.700000,1.270000e+03,107.000000
0,agg25244154 (Mean),agg25244154 (Mean),agg25244154 (Mean),4.649457,agg25244154 (Mean),2.986460e+07,64.037209,9.635783e+03,97.248527


In [None]:
# Talk to Zach
# Focus also on writing
