In [1411]:
import math
import numpy as np
import pandas as pd

import re
import json
from tqdm import tqdm
from collections import defaultdict

import plotly.graph_objects as go

In [1412]:
pd.options.display.max_rows = 50
pd.options.mode.chained_assignment = None

### Data Preparation

#### Data: KJV Bible

In [1413]:
df_kjv = pd.read_csv("../data/kjv_clean.csv")
df_kjv['verse'] = df_kjv['book_name'] + ' ' + df_kjv['chapter_number'].astype(str) + ':' + df_kjv['verse_number'].astype(str)

In [1414]:
dict_kjv = defaultdict(lambda: defaultdict(list))

for _, row in df_kjv.iterrows():
    dict_kjv[row["book_name"]][row["chapter_number"]].append(row["verse_number"])

dict_kjv = {book: dict(chapters) for book, chapters in dict_kjv.items()}

In [1415]:
print(dict_kjv)

{'Genesis': {1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], 2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], 3: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], 4: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26], 5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], 6: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], 7: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], 8: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], 9: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], 10: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,

In [1416]:
old_testament = [
    "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy",
    "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel",
    "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra",
    "Nehemiah", "Esther", "Job", "Psalms", "Proverbs",
    "Ecclesiastes", "Song of Solomon", "Isaiah", "Jeremiah", "Lamentations",
    "Ezekiel", "Daniel", "Hosea", "Joel", "Amos",
    "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk",
    "Zephaniah", "Haggai", "Zechariah", "Malachi"
]

new_testament = [
    "Matthew", "Mark", "Luke", "John", "Acts",
    "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians",
    "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy",
    "2 Timothy", "Titus", "Philemon", "Hebrews", "James",
    "1 Peter", "2 Peter", "1 John", "2 John", "3 John",
    "Jude", "Revelation"
]

books_kjv = old_testament + new_testament

#### Data: Bible Events

In [1417]:
df_events = pd.read_csv("../data/events.csv", dtype=str)

In [1418]:
df_events = df_events[["title", "startDate", "duration", "verses"]]

In [1419]:
def sort_verses(verse_str):
    return ",".join(sorted(verse_str.split(","), key=lambda x: tuple(map(int, x.split(".")[1:]))))

df_events['verses'] = df_events['verses'].apply(sort_verses)

In [1420]:
kjv_books_abv = {'GEN': 'Genesis', 
                'EXOD': 'Exodus', 
                'LEV': 'Leviticus', 
                'NUM': 'Numbers', 
                'DEUT': 'Deuteronomy', 
                'JOSH': 'Joshua', 
                'JUDG': 'Judges', 
                'RUT': 'Ruth', 
                '1SAM': '1 Samuel', 
                '2SAM': '2 Samuel', 
                '1KGS': '1 Kings', 
                '2KGS': '2 Kings', 
                '1CHR': '1 Chronicles', 
                '2CHR': '2 Chronicles', 
                'EZR': 'Ezra', 
                'NEH': 'Nehemiah', 
                'EST': 'Esther', 
                'JOB': 'Job', 
                'PSA': 'Psalms', 
                'PRO': 'Proverbs', 
                'ECC': 'Ecclesiastes', 
                'SNG': 'Song of Solomon', 
                'ISA': 'Isaiah', 
                'JER': 'Jeremiah', 
                'LAM': 'Lamentations', 
                'EZEK': 'Ezekiel', 
                'DAN': 'Daniel', 
                'HOS': 'Hosea', 
                'JOEL': 'Joel', 
                'AMOS': 'Amos', 
                'OBAD': 'Obadiah', 
                'JONAH': 'Jonah', 
                'MIC': 'Micah', 
                'NAH': 'Nahum', 
                'HAB': 'Habakkuk', 
                'ZEPH': 'Zephaniah', 
                'HAG': 'Haggai', 
                'ZECH': 'Zechariah', 
                'MAL': 'Malachi', 
                'MATT': 'Matthew', 
                'MARK': 'Mark', 
                'LUKE': 'Luke', 
                'JOHN': 'John', 
                'ACTS': 'Acts', 
                'ROM': 'Romans', 
                '1CO': '1 Corinthians', 
                '2CO': '2 Corinthians', 
                'GAL': 'Galatians', 
                'EPH': 'Ephesians', 
                'PHP': 'Philippians', 
                'COL': 'Colossians', 
                '1TH': '1 Thessalonians', 
                '2TH': '2 Thessalonians', 
                '1TI': '1 Timothy', 
                '2TI': '2 Timothy', 
                'TIT': 'Titus', 
                'PHM': 'Philemon', 
                'HEB': 'Hebrews', 
                'JAS': 'James', 
                '1PE': '1 Peter', 
                '2PE': '2 Peter', 
                '1JN': '1 John', 
                '2JN': '2 John', 
                '3JN': '3 John', 
                'JUD': 'Jude', 
                'REV': 'Revelation'}

In [1421]:
def format_verses(verses_str):
    verse_list = verses_str.split(',')
    formatted_verses = []
    
    for verse in verse_list:
        parts = verse.split('.')
        if len(parts) == 3:
            book_abv, chapter, verse_num = parts
            book_abv = book_abv.upper()
            # if book_abv in kjv_books_abv:
            formatted_verses.append(f"{kjv_books_abv[book_abv]} {chapter}:{verse_num}")
    
    return formatted_verses

In [1422]:
df_events["verses_ls"] = df_events["verses"].apply(format_verses)
df_events.drop(columns=['verses'], inplace=True)

In [1423]:
verse_to_index = {verse: idx for idx, verse in df_kjv['verse'].items()}
verse_to_text  = {verse: text for verse, text in zip(df_kjv['verse'], df_kjv['verse_text'])}

In [1424]:
def group_verses(verses):
    """
    Given a list of verses (as strings), group them into lists where
    each verse is consecutive to the previous one (i.e. its index in df_kjv is one more).
    """
    groups = []
    current_group = []
    for v in verses:
        if not current_group:
            current_group.append(v)
        else:
            prev_v = current_group[-1]
            if verse_to_index.get(v, -1) == verse_to_index.get(prev_v, -2) + 1:
                current_group.append(v)
            else:
                groups.append(current_group)
                current_group = [v]
    if current_group:
        groups.append(current_group)
    return groups

In [1425]:
def group_texts(verse_groups):
    """
    Given a list of verse groups (each group is a list of verses),
    return a list of strings where each string is the joined text from the group.
    """
    texts = []
    for group in verse_groups:
        # text = " ".join(verse_to_text[v] for v in group)
        # texts.append(text)
        text = []
        for v in group:
            text += [verse_to_text[v]]
        texts.append(text)
    return texts

In [1426]:
df_events['verses_lss'] = df_events['verses_ls'].apply(group_verses)

In [1427]:
df_events['texts_ls'] = df_events['verses_lss'].apply(group_texts)
df_events.drop(columns=['verses_ls'], inplace=True)
df_events.rename(columns={'verses_lss': 'verses_ls'}, inplace=True)

In [1428]:
def process_start_date(value):
    value = str(value)
    if '-' in value and len(value) > 5:
        year = value.split('-')[0]
    else:
        year = value
    return int(year)

df_events['year'] = df_events['startDate'].apply(process_start_date)
df_events.drop(columns=['startDate'], inplace=True)

In [1429]:
def get_largest_duration(duration):
    unit_order = ['Y', 'M', 'W', 'D']
    
    matches = re.findall(r'(\d*\.?\d+)([YMWD])', duration)
    
    for unit in unit_order:
        for value, u in matches:
            if u == unit:
                return f"{value}{u}"
    return None

df_events['duration'] = df_events['duration'].apply(get_largest_duration)

### Timeline Chart

#### Before Christ

In [1430]:
df_events_ad = df_events[df_events['year'] <= 0]

In [1431]:
def convert_duration_to_years(duration_str):
    unit = duration_str[-1]
    
    if unit == 'Y':
        return max(int(float(duration_str[:-1])), 25)
    else:
        return 25

In [1432]:
df_events_ad['year'] = df_events_ad['year'].astype(int)
df_events_ad['duration_years'] = df_events_ad['duration'].apply(convert_duration_to_years)

df_events_ad['endYear'] = df_events_ad['year'] + df_events_ad['duration_years']


In [1433]:
def build_hover_text(row):
    txt = f"<b>Title:</b> {row['title']}<br>"
    txt += f"<b>Year:</b> {row['year']}<br>"
    txt += f"<b>Duration:</b> {row['duration']}<br>"
    txt += f"<b>Verses:</b><br>"
    
    for verses, texts in zip(row['verses_ls'], row['texts_ls']):
        verse_display = f"{verses[0]} - {verses[-1]}" if len(verses) > 1 else verses[0]
        # text_display = ""
        
        # for verse, text in zip(verses, texts):
        #     text_display += f"{text} <br>"
        # txt += f"{verse_display}<br>\"{text_display}\"<br>"
        txt += f"{verse_display}<br>"
    txt += "<extra></extra>"
    return txt

In [1434]:
df_events_ad['hover_text'] = df_events_ad.apply(build_hover_text, axis=1)
df_events_ad_sorted = df_events_ad.sort_values(by='year', ascending=True)

In [1435]:
fig_ad = go.Figure(
    data=[
        go.Bar(
            x=df_events_ad_sorted['duration_years'],
            y=df_events_ad_sorted['title'],
            base=df_events_ad_sorted['year'],
            orientation='h',
            marker=dict(
                color=df_events_ad_sorted['duration_years'],
                colorscale="Reds",
                showscale=True,
                colorbar=dict(title="Duration (Years)",
                            titlefont=dict(color="#fafafa"),
                            tickfont=dict(color="#fafafa")
                            ),
                cmin=0,
                cmax=750
            ),
            hovertemplate='%{customdata}',
            customdata=df_events_ad_sorted['hover_text']
        )
    ],
    layout=go.Layout(
        xaxis_title="Approximate Year",
        yaxis_title="Event",
        xaxis=dict(
            tickmode='auto',
            tickvals=np.linspace(df_events_ad_sorted['year'].min(), df_events_ad_sorted['endYear'].max(), 10),
            ticktext=[f"{int(x)}" for x in np.linspace(df_events_ad_sorted['year'].min(), df_events_ad_sorted['endYear'].max(), 10)],
            color="#fafafa",
            # tickfont=dict(color="white")
        ),
        yaxis=dict(
            color="#fafafa",
            tickfont=dict(color="#fafafa")
        ),
        hoverlabel=dict(
            bgcolor="#0f1116",
            font=dict(color="white"),
            align='left'
        ),
        height=2500,
        width=1250,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)'
    )
)

In [1446]:
# fig_ad.show()

In [1449]:
# fig_ad.write_json("../models/figure_timeline_ad.json")

#### Anno Domini

In [1438]:
df_events_ce = df_events[df_events['year'] >= 0]

In [1439]:
def convert_duration_to_years(duration_str):
    unit = duration_str[-1]
    number = duration_str[:-1]
    conv = {'Y': 1, 'M': 1/12, 'W': 1/52, 'D': 1/365}
    return math.ceil(float(number) * conv[unit])

In [1440]:
df_events_ce['year'] = df_events_ce['year'].astype(int)
df_events_ce['duration_years'] = df_events_ce['duration'].apply(convert_duration_to_years)

df_events_ce['endYear'] = df_events_ce['year'] + df_events_ce['duration_years']

In [1441]:
def build_hover_text(row):
    txt = f"<b>Title:</b> {row['title']}<br>"
    txt += f"<b>Year:</b> {row['year']}<br>"
    txt += f"<b>Duration:</b> {row['duration']}<br>"
    txt += f"<b>Verses:</b><br>"
    
    for verses, texts in zip(row['verses_ls'], row['texts_ls']):
        verse_display = f"{verses[0]} - {verses[-1]}" if len(verses) > 1 else verses[0]
        # text_display = ""
        
        # for verse, text in zip(verses, texts):
        #     text_display += f"{text} <br>"
        # txt += f"{verse_display}<br>\"{text_display}\"<br>"
        txt += f"{verse_display}<br>"
    txt += "<extra></extra>"
    return txt

In [1442]:
df_events_ce['hover_text'] = df_events_ce.apply(build_hover_text, axis=1)
df_events_ce_sorted = df_events_ce.sort_values(by='year', ascending=True)

In [1443]:
fig_ce = go.Figure(
    data=[
        go.Bar(
            x=df_events_ce_sorted['duration_years'],
            y=df_events_ce_sorted['title'],
            base=df_events_ce_sorted['year'],
            orientation='h',
            marker=dict(
                color=df_events_ce_sorted['duration_years'],
                colorscale="Reds",
                showscale=True,
                colorbar=dict(title="Duration (Years)",
                            titlefont=dict(color="#fafafa"),
                            tickfont=dict(color="#fafafa")
                            ),
                cmin=0,
                cmax=7.5
            ),
            hovertemplate='%{customdata}',
            customdata=df_events_ce_sorted['hover_text']
        )
    ],
    layout=go.Layout(
        xaxis_title="Approximate Year",
        yaxis_title="Event",
        xaxis=dict(
            tickmode='auto',
            tickvals=np.linspace(df_events_ce_sorted['year'].min(), df_events_ce_sorted['endYear'].max(), 10),
            ticktext=[f"{int(x)}" for x in np.linspace(df_events_ce_sorted['year'].min(), df_events_ce_sorted['endYear'].max(), 10)],
            color="#fafafa",
            # tickfont=dict(color="white")
        ),
        yaxis=dict(
            color="#fafafa",
            tickfont=dict(color="#fafafa")
        ),
        hoverlabel=dict(
            bgcolor="#0f1116",
            font=dict(color="white"),
            align='left'
            ),
        height=2500,
        width=1250,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)'
    )
)

In [1447]:
# fig_ce.show()

In [1448]:
# fig_ce.write_json("../models/figure_timeline_ce.json")