## Data Ingestion

- Currently using only Word and WordNet Extension columns
- Aggregating & grouping details into json for easy use

In [90]:
import pandas as pd
import json
import numpy as np

In [91]:
df = pd.read_csv('dict4dataset_06_23_22.csv')

In [94]:
df[df['Word']=='because']

Unnamed: 0,Strategy No.,Strategy Name,Category No.,Category Name,Semantic Anchor,Word,WordNet Extension,Phrase Extension,Reprogramming,ReprogramType,Popup_title,popup_feedback,Sidebar_feedback,Color(hex)
129,L2f,Understanding Healthy Patterns,2.0,Coherent narrative words,Coherent narrative words,because,,,,,Coherent narrative words,You will benefit from using these words to con...,<h3>More About Insight Words and Coherent Narr...,#44aa38


In [95]:
df_clean = df.dropna(subset=['Strategy No.'])
df_clean = df_clean[df_clean['Strategy No.'].str.contains('L2')]
df_clean = df_clean.replace({"Word": "/", "WordNet Extension": "/", "Phrase Extension": "/"}, np.nan) \
             .dropna(subset=['Category No.']) \
             .astype({'Category No.': 'int32'}) \
             .sort_values(["Strategy No."])

# for wordnet extension split at comma
df_clean['WordNet Extension'] = df_clean['WordNet Extension'].apply(lambda x: x.split(',') if isinstance(x,str) else x)
df_clean['Phrase Extension'] = df_clean['Phrase Extension'].apply(lambda x: x.split(',') if isinstance(x,str) else x)

# for reprogramming split at ;
df_clean['Reprogramming'] = df_clean['Reprogramming'].apply(lambda x: x.split(';') if isinstance(x,str) else x)

df_minimal = df_clean[["Word", "WordNet Extension", "Phrase Extension", "Reprogramming", "ReprogramType", "Popup_title", "popup_feedback", "Sidebar_feedback", "Color(hex)"]]

In [97]:
df_minimal[df_minimal['Word']=='because']

Unnamed: 0,Word,WordNet Extension,Phrase Extension,Reprogramming,ReprogramType,Popup_title,popup_feedback,Sidebar_feedback,Color(hex)
129,because,,,,,Coherent narrative words,You will benefit from using these words to con...,<h3>More About Insight Words and Coherent Narr...,#44aa38


In [98]:
dict_minimal = df_minimal.rename(columns={
    "WordNet Extension":"wordnet_ext", 
    "Phrase Extension":"phrase_ext",
    "Reprogramming":"rewrite",
    "ReprogramType":"rewrite_position",
    "Color(hex)":"color"}).to_dict(orient="records")
dict_minimal

[{'Word': 'altruistic',
  'wordnet_ext': nan,
  'phrase_ext': nan,
  'rewrite': ['Without this judgment, I will',
   ' Beyond judgment, the situation and my feelings can be described as:'],
  'rewrite_position': 'After',
  'Popup_title': 'Judgment',
  'popup_feedback': 'Nothing special. We make judgments all the time! Just be aware of the unproductive negative ones.',
  'Sidebar_feedback': '<h3>More About Judgments</h3>\n<p>Maybe you look out the window, see rain and wind, and make the judgment that the weather is bad. These negative judgments aren\'t right or wrong, but they are often unconscious and often unproductive. Negative judgments often make us feel bad and make us less effective at managing the situation.</p>\n<ul>\n<li>Yes, I can have judgments!</li>\n<li>Does the judgment make me feel bad?</li>\n<li>Take a step back, write down the situation, the resulting emotion, and the outcome of the judgment</li>\n<li>Yeah, I become more aware of how unwanted negative judgments are bec

In [64]:
# df_clean.columns

Index(['Strategy No.', 'Strategy Name', 'Category No.', 'Category Name',
       'Semantic Anchor', 'Word', 'WordNet Extension', 'Phrase Extension',
       'Reprogramming', 'ReprogramType', 'Popup_title', 'popup_feedback',
       'Sidebar_feedback', 'Color(hex)'],
      dtype='object')

In [65]:
groupped = df_clean.groupby(["Strategy No.", "Category No.", "Category Name", "Semantic Anchor"], dropna=False) \
                   .apply(lambda x: x[["Word","WordNet Extension","Phrase Extension", "Reprogramming", "ReprogramType", "Popup_title", "popup_feedback","Sidebar_feedback", "Color(hex)"]].to_json(orient='columns'))

In [66]:
isinstance(groupped, pd.Series)

True

In [67]:
# groupped.groups
# groupped.index
for index, value in groupped.items():
    print(f"Index : {index}, Value : {json.loads(value)}")

Index : ('L2a', 1, 'Positive Adjectives', 'Judgment'), Value : {'Word': {'43': 'altruistic', '45': 'high-rpincipled', '46': 'right', '47': 'fair', '48': 'perfect', '49': 'wonderful', '50': 'confident', '44': 'diligent', '42': 'brave'}, 'WordNet Extension': {'43': None, '45': None, '46': None, '47': None, '48': None, '49': None, '50': None, '44': None, '42': None}, 'Phrase Extension': {'43': None, '45': None, '46': None, '47': None, '48': None, '49': None, '50': None, '44': None, '42': None}, 'Reprogramming': {'43': None, '45': None, '46': None, '47': None, '48': None, '49': None, '50': None, '44': None, '42': ['Without this judgment, I will', ' Beyond judgment, the situation and my feelings can be described as:']}, 'ReprogramType': {'43': 'After', '45': 'After', '46': 'After', '47': 'After', '48': 'After', '49': 'After', '50': 'After', '44': 'After', '42': 'After'}, 'Popup_title': {'43': 'Judgment', '45': 'Judgment', '46': 'Judgment', '47': 'Judgment', '48': 'Judgment', '49': 'Judgment

In [68]:
agg = []

for index, value in groupped.items():
    out = { 
        "strategy_code" : index[0],
        "category_number" : index[1],
        "semantic_anchor" : index[2],
        "words" : [i for i in list(json.loads(value)["Word"].values()) if i],
        "wordnet_ext" : [i for i in list(json.loads(value)["WordNet Extension"].values()) if i],
        "phrase_ext" : [i for i in list(json.loads(value)["Phrase Extension"].values()) if i],
        "rewrite" : list(json.loads(value)["Reprogramming"].values())[0],
        "rewrite_position" : list(json.loads(value)["ReprogramType"].values())[0],
        # "brief_feedback" : [*json.loads(value)["Brief Feedback"].values()][0],
        # "longer_feedback" : [*json.loads(value)["Longer Feedback"].values()][0],
        # "color" : [*json.loads(value)["Color"].values()][0],
        "popup_title" : [*json.loads(value)["Popup_title"].values()][0],
        "popup_feedback" : [*json.loads(value)["popup_feedback"].values()][0],
        "Sidebar_feedback" : [*json.loads(value)["Sidebar_feedback"].values()][0],
        "color" : [*json.loads(value)["Color(hex)"].values()][0],
    }
    out["wordnet_ext"] = [item.strip() for sublist in out["wordnet_ext"] for item in sublist]
    out["phrase_ext"] = [item.strip() for sublist in out["phrase_ext"] for item in sublist]
    agg.append(out)
    
# print(agg)
# print(json.loads(agg))

## FOR SAVING:

In [69]:
import datetime
ts = datetime.datetime.now().isoformat()

In [70]:
with open(ts+'dict.json', 'w', encoding='utf-8') as f:
    json.dump(agg, f, ensure_ascii=False, indent=4)

In [1]:
df

NameError: name 'df' is not defined

In [71]:

# filter for "Strategy No." starting with "L1" 
df_l1 = df.dropna(subset=['Strategy No.'])
df_l1 = df_l1[df_l1['Strategy No.'].str.contains('L1')]
# df_l1


In [72]:
df_l1_clean = df_l1[["Word", "Reprogramming", "ReprogramType"]]
df_l1_clean = df_l1_clean.dropna(subset="Reprogramming")

In [73]:
l1_dict = df_l1_clean.to_dict(orient="records")

In [74]:
for obj in l1_dict:
    obj['Reprogramming'] = obj['Reprogramming'].split(";")

In [75]:
l1_dict

[{'Word': 'loser',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'suck',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'hate',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'lazy',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'the worst',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'useless',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'failure',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'pathetic',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'good-for-nothing',
  'Reprogramming': ['My inner value behind these is'],
  'ReprogramType': 'end'},
 {'Word': 'dumb',
  'Reprogramming': ['My inner value behind these is'],

In [32]:
with open("l1_dict.json", "w") as f:
    json.dump(l1_dict, f)