In [None]:
import itertools
import numpy as np
import pandas as pd
from collections import defaultdict
from glob import glob

In [None]:
protagonists = ['ปิติ', 'มานะ', 'มานี', 'ชูใจ', 'วีระ', 'เพชร', 'กล้า', 'สีเทา', 'โต', 'ไพลิน']
protagonists_dict = {v: k for k, v in enumerate(protagonists)}

def read_book(file_path, page_start='ป.4 เล่ม 1'):
    f = open(file_path)
    lines = f.readlines()
    lines = [line.strip() for line in lines if not line.strip() == '']
    pages = []
    page = []
    page.append(lines[0])
    for line in lines[1::]:
        if page_start in line:
            pages.append(page)
            page = []
        page.append(line)
    return pages

def count_characters(pages):
    """
    Count characters
    """
    d = defaultdict(int)
    characters_all_pages = []
    for page in pages:
        page_text = ' '.join(page)
        characters = []
        for i, p in enumerate(protagonists):
            if p in page_text:
                d[p] += 1
                characters.append(p)
        if len(characters) >= 2:
            characters_all_pages.append(characters)
    return d, characters_all_pages

def create_network_matrix(characters_all_pages):
    """
    Create network of 
    """
    n = len(protagonists)
    W = np.zeros((n, n))
    for characters in characters_all_pages:
        for c1, c2 in itertools.permutations(characters, 2):
            W[protagonists_dict[c1], protagonists_dict[c2]] += 1
    W = pd.DataFrame(W, index=protagonists, columns=protagonists).astype(int)
    return W

In [None]:
headers = ['ปอ 1 เล่ม 1', 'ปอ 1 เล่ม 2', 
           'ป.๒', 'ป.๒', 'ป. ๓ เล่ม ๑', 
           'ป.3', 'ป.4 เล่ม 1', 'ป.๔ เล่ม ๒', 
           'ป.๕ เล่ม ๑', 'ป.๕ เล่ม ๒', 
           'ป.๖ เล่ม', 'ป.6 ล.2']
path_tuple = list(zip(sorted(glob('books/*.txt')), headers))

In [None]:
book_titles = []
for i in range(1, 7):
    for j in range(1, 3):
        book_titles.append('ป.%s เล่ม %s' % (str(i), str(j)))

protagonists_ = [
    'ปิติ',
    'มานะ',
    'มานี',
    'ชูใจ',
    'วีระ',
    'เพชร',
    'กล้า',
    'สีเทา',
    'โต',
    'ครูไพลิน'
]

## **Calculating occurence**

In [None]:
social = []
pages_list = []
for file_path, line_header in path_tuple:
    pages = read_book(file_path, line_header)
    d, characters_all_pages = count_characters(pages)
    d = dict(d)
    W = create_network_matrix(characters_all_pages)
    social.append([file_path, line_header, d, characters_all_pages, W])
    pages_list.append(pages)
O_df = pd.DataFrame([s[2] for s in social]).fillna(0).astype(int)

In [None]:
from lightning import Lightning
lgn = Lightning(local=True, ipython=True)

viz = lgn.matrix(O_df.as_matrix(), colormap='Reds', 
                 column_labels=list(O_df.columns), 
                 row_labels=book_titles,
                 numbers=True, width=500, height=700)
viz

## **Example of social network**

In [None]:
# book number 11 >> ป. 5 เล่ม 1
viz = lgn.matrix(social[11][-1].as_matrix(), colormap='Reds', 
                 row_labels=protagonists_, 
                 column_labels=protagonists_, 
                 numbers=True, width=500, height=500)
viz

## Issue of the HTML file

What make it does not render properly within `{% include .html %}` is at 

```
...<div id=A5YE3C47OE data-type="matrix"...
```

`id` has no quote `"`.

## What to do

Find `id` with no quote and quote them, that's it.

In [None]:
import re

# This function expects only 1 occurrence of un-quoted id
# if it errors in the future, check this function.

def quoteID(baseString):
    tmpRe = re.findall('id=(\w+)', baseString)[0]
    return baseString.replace(tmpRe, '"{0}"'.format(tmpRe))

## Suggestion

- wrap this as module :p

In [None]:
js = viz.load_embed()
base = viz._html

postName = !basename $(pwd)
# magic return list
postName = postName[0]

plotName = 'plots/example_plot'

# use this quotedBase instead of unquoted
quotedBase = quoteID(base)

# append ref to js into html first
quotedBase+='\n<script src="https://rawgit.com/tupleblog/tuple_code/master/{0}/{1}.js"></script>'.format(postName, plotName)

with open("./{0}.html".format(plotName), "wb") as f:
    f.write(quotedBase.encode('utf-8'))
    
with open("./{0}.js".format(plotName), "wb") as f:
    f.write(js.encode('utf-8'))