In [18]:
import re
import os
import sys
import sqlite3
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
import plotly.graph_objects as go
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
 
    return None
def query(conn, sql):
    conn.row_factory = dict_factory
    cur = conn.cursor()
    cur.execute(sql)
    results = cur.fetchall()
    return pd.DataFrame(results)
def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d
conn = create_connection("../bible/nasb.db")
bible = {"genesis":50,"exodus":40,"leviticus":27, "numbers":36,"deuteronomy":34, "joshua":24, "judges":21, "ruth":4, "1-samuel":31, "2-samuel":24,
        "1-kings":22, "2-kings":25, "1-chronicles":29, "2-chronicles":36, "ezra":10, "nehemiah":13, "esther":10, "job":42, "psalms":150,
        "proverbs":31, "ecclesiastes":12, "song-of-solomon":8, "isaiah":66, "jeremiah":52, "lamentations":5, "ezekiel":48, "daniel":12,
        "hosea":14, "joel":3, "amos":9, "obadiah":1, "jonah":4, "micah":7, "nahum":3, "habakkuk":3, "zephaniah":3, "haggai":2,"zechariah":14,
        "malachi":4, "matthew":28, "mark":16, "luke":24, "john":21, "acts":28, "romans":16, "1-corinthians":16, "2-corithians":13, "galatians":6,
        "ephesians":6, "philippians":4, "colossians":4, "1-thessalonians":5, "2-thessalonians":3, "1-timothy":6, "2-timothy":4,
        "titus":3, "philemon":1, "hebrews":13, "james":5, "1-peter":5, "2-peter":3, "1-john":5, "2-john":1, "3-john":1, "jude":1, "revelation":22}

In [6]:
r = query(conn, '''select * from refs''')

In [26]:

def get_ref_edges(row):
    for con in row.refs.split(','):
        if con != '':
            all_edges.append([row.ref,con])

In [34]:
all_edges = []
x = r.apply(get_ref_edges, axis=1)
all_edges = pd.DataFrame(all_edges, columns = ['ref','edge'])
#all_edges = all_edges.set_index('ref')

all_edges["book"] = all_edges.ref.apply(lambda x: x.split(' ')[0])
all_edges["chapter"] = all_edges.ref.apply(lambda x: x.split(' ')[1].split(':')[0])

In [37]:
all_edges["edge_book"] = all_edges.edge.apply(lambda x: x.split(' ')[0])
all_edges["edge_chapter"] = all_edges.edge.apply(lambda x: x.split(' ')[1].split(':')[0])

In [43]:
len(all_edges[(all_edges.edge_book == 'genesis')&(all_edges.book=='exodus')])

343

In [44]:
book_edges = []
for book in bible.keys():
    for edge_book in bible.keys():
        book_edges.append({'book1': book, 'book2':edge_book, 'num_conn': len(all_edges[(all_edges.edge_book == book)&(all_edges.book==edge_book)])})

In [48]:
connection_books = pd.DataFrame(book_edges)
connection_books = connection_books[connection_books.book1!=connection_books.book2]

In [51]:
connection_books.to_excel('book_connections.xlsx')

In [5]:
connections = pd.read_csv('book_connections.csv')

In [26]:
connections.head()

Unnamed: 0.1,Unnamed: 0,book1,book2,num_conn
0,1,genesis,exodus,343
1,2,genesis,leviticus,130
2,3,genesis,numbers,233
3,4,genesis,deuteronomy,237
4,5,genesis,joshua,121


In [27]:
nodes = connections.book1.unique()


edge = connections[connections.num_conn>50].apply(lambda row: [str(row.book1), row.book2],axis=1)

edges = []
for e in edge:
    edges.append(e)

In [28]:
import networkx as nx
G = nx.Graph()
G.add_edges_from(edges)
G.add_nodes_from(nodes)

In [29]:
pos=nx.spring_layout(G)

In [49]:
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]][0],  pos[edge[0]][1]
    x1, y1 = pos[edge[1]][0],  pos[edge[1]][1]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
for node in pos.keys():
    node_x.append(pos[node][0])
    node_y.append(pos[node][1])

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))
node_adjacencies = []
node_text = []
x = 0
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append(list(bible.keys())[x] + ': '+str(len(adjacencies[1])))
    x+=1

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [50]:
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Network graph made with Python',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show()