In [None]:
import pandas as pd
import ast
import re
from collections import Counter

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import networkx as nx

from mysql.connector import Error
import core.utils as oa

import json
import os

from pathlib import Path

# root directory path
ROOT = Path(__file__).resolve().parents[2]

In [34]:
def create_quote_dist_chart(ids: list, type: str) -> go.Figure:
    type_dict = {
        "orgelpredigt": "Orgelpredigtzitate",
        "musikwerk": "Liedzitate",
        "quelle": "Literaturzitate",
    }
    if type not in type_dict.keys():
        occ_fig = go.Figure()
        occ_fig.update_layout(title_text="Type not recognised!")
        return occ_fig
    
    else:
        chunked_text = [0]*100
        thumbnails = [""]*100

        for id in ids:
            sermon = oa.Sermon(id)

            dec = int(len(sermon.words) / 99)
            overhang = len(sermon.words) % dec

            for i, j in zip(range(0, len(sermon.words), dec), range(0, 100)):
                types_unique = list(set(sermon.word_types[i:i+dec]))
                types_str = " ".join([x for x in types_unique if isinstance(x, str)])
                if type in types_str:
                    type_test = 1
                    hit = f"{sermon.kurztitel}<br>"
                else:
                    type_test = 0
                    hit = ""
                
                chunked_text[j] = chunked_text[j] + type_test
                thumbnails[j] = thumbnails[j] + hit
            
            last_types_unique = list(set(sermon.word_types[-overhang:]))
            last_types_str = " ".join([x for x in last_types_unique if isinstance(x, str)])
            if type in last_types_str:
                last_type_test = 1
                last_hit = f"{sermon.kurztitel}<br>"
            else:
                last_type_test = 0
                last_hit = ""
            
            #chunked_text[-1] = chunked_text[-1] + last_orgelpredigt_test
            #thumbnails[-1] = thumbnails[-1] + last_hit

        occ_fig = go.Figure()

        for i in range(0, len(chunked_text)):
            hovertext = f'{chunked_text[i]} {type_dict[type]} im {i+1}%'
            if thumbnails[i] != "":
                    hovertext += f"<br>{thumbnails[i]}"

            gradient = chunked_text[i] * 15
            color = f'rgb({max(250-gradient, 0)},{max(250-gradient, 0)},{max(250-gradient, 0)})'
            occ_fig.add_trace(go.Bar(
                x = [f"{type_dict[type]} je Predigtprozent"],
                y = [100],
                marker_color = color,
                hovertext = hovertext
            ))

        occ_fig.update_layout(width=1500,height=500, showlegend=False)
            
        return occ_fig

In [None]:
# Get the list of all sermons
with open(ROOT / "predigten_übersicht.json", "r", encoding="utf-8") as file: 
    data = json.load(file)
ids = list(data.keys())

# Create Overall Song Distribution Chart

In [4]:
create_quote_dist_chart(ids, "musikwerk")

# Create Song Distribution Chart in 25 Year Increments

In [37]:
def group_sermons_in_years(data, interval: int) -> list:
    chunked_sermons = []
    start_year = 1600
    end_year = 1800
    yearfinder = re.compile(r'[0-9]{4}')
    for i in range(start_year, end_year, interval):
        sermons = []
        for id, info in data.items():
            year = int(re.findall(yearfinder, info['year'])[0])
            if year > i and year < i + interval:
                sermons.append(id)
        chunked_sermons.append(sermons)

    return chunked_sermons


In [38]:
sermons_grouped_25 = group_sermons_in_years(data, 25)
sermons_grouped_50 = group_sermons_in_years(data, 50)

In [39]:
figs_25 = []
for i in range(len(sermons_grouped_25)):
    figs_25.append(create_quote_dist_chart(sermons_grouped_25[i], "musikwerk"))
    

In [40]:
# Create subplots
fig = make_subplots(rows=len(figs_25), 
                    cols=1, 
                    subplot_titles=[f"Liedverteilung in Predigten zwischen {1600 + (i*25)} und {1600+(i*25)+25} ({len(sermons_grouped_25[i])} Predigten)" for i in range(len(figs_25))])

# Add traces from each figure to the subplots
for i, fig_item in enumerate(figs_25):
    for trace in fig_item.data:
        fig.add_trace(trace, row=i+1, col=1)

# Update layout
fig.update_layout(height=1200, width=1000, showlegend = False)
fig.update_layout(title_text="Accumulierte Verteilung von Liedzitaten in 25-Jahr Intervallen")

# Show the figure
fig.show()

In [9]:
figs_50 = []
for i in range(len(sermons_grouped_50)):
    figs_50.append(create_quote_dist_chart(sermons_grouped_50[i], "musikwerk"))

In [41]:
# Create subplots
fig = make_subplots(rows=len(figs_50), 
                    cols=1, 
                    subplot_titles=[f"Liedverteilung in Predigten zwischen {1600 + (i*50)} und {1600+(i*50)+50} ({len(sermons_grouped_50[i])} Predigten)" for i in range(len(figs_50))])

# Add traces from each figure to the subplots
for i, fig_item in enumerate(figs_50):
    for trace in fig_item.data:
        fig.add_trace(trace, row=i+1, col=1)

# Update layout
fig.update_layout(height=1200, width=1000, showlegend = False)
fig.update_layout(title_text="Accumulierte Verteilung von Liedzitaten in 50-Jahr Intervallen")

# Show the figure
fig.show()

In [64]:
x = ""
x.split()

[]

# Orgelpredigtzitate

In [11]:
figs_25_orgel = []
for i in range(len(sermons_grouped_25)):
    figs_25_orgel.append(create_quote_dist_chart(sermons_grouped_25[i], "orgelpredigt"))
figs_50_orgel = []
for i in range(len(sermons_grouped_50)):
    figs_50_orgel.append(create_quote_dist_chart(sermons_grouped_50[i], "orgelpredigt"))

In [12]:
# Create subplots
fig = make_subplots(rows=len(figs_25_orgel), 
                    cols=1, 
                    subplot_titles=[f"Verteilung von Orgelpredigtzitaten in Predigten zwischen {1600 + (i*25)} und {1600+(i*25)+25} ({len(sermons_grouped_25[i])} Predigten)" for i in range(len(figs_25))])

# Add traces from each figure to the subplots
for i, fig_item in enumerate(figs_25_orgel):
    for trace in fig_item.data:
        fig.add_trace(trace, row=i+1, col=1)

# Update layout
fig.update_layout(height=1200, width=1000, showlegend = False)
fig.update_layout(title_text="Akkumulierte Verteilung von Orgelpredigtzitaten in 25-Jahr Intervallen")

# Show the figure
fig.show()

In [42]:
# Create subplots
fig = make_subplots(rows=len(figs_50_orgel), 
                    cols=1, 
                    subplot_titles=[f"Verteilung von Orgelpredigtzitaten in Predigten zwischen {1600 + (i*50)} und {1600+(i*50)+50} ({len(sermons_grouped_50[i])} Predigten)" for i in range(len(figs_50))])

# Add traces from each figure to the subplots
for i, fig_item in enumerate(figs_50_orgel):
    for trace in fig_item.data:
        fig.add_trace(trace, row=i+1, col=1)

# Update layout
fig.update_layout(height=1200, width=1000, showlegend = False)
fig.update_layout(title_text="Akkumulierte Verteilung von Orgelpredigtzitaten in 50-Jahr Intervallen")

# Show the figure
fig.show()