In [1]:
import pandas as pd
import db_connection
import ast
import re

import plotly.graph_objects as go
#import matplotlib.pyplot as plt
import networkx as nx

from mysql.connector import Error
from orgelpredigt_analysis import Sermon
from orgelpredigt_analysis import Person

import json
import os

In [2]:
def is_id(value):
    pattern = re.compile(r'E[01][0-9]{5}')
    if re.match(pattern, value):
        return True
    else:
        return False

In [3]:
# Get the list of all files in a directory
with open("predigten_übersicht.json", "r", encoding="utf-8") as file: 
    data = json.load(file)

# Ensure all entries have a 'year' key
cleaned = {k: v for k, v in data.items() if 'year' in v}

year_finder = re.compile(r'[0-9]{4}')

for k, v in data.items():
    year = re.findall(year_finder, v['year'])[0]
    if year:
        v['year'] = year
    else:
        v['year'] = '[s.a.]'

# Convert to nested list and sort by year
relevant_sermons = sorted(
    [[key, value['title'], int(value['year'])] for key, value in cleaned.items()],
    key=lambda x: x[2]
)

ids = [i[0] for i in relevant_sermons]

In [4]:
sermons = []

In [5]:
for id in ids:
    item = {}
    current_sermon = Sermon(id)
    item["id"] = current_sermon.id
    item["links"] = [item for item in current_sermon.all_references if is_id(item)]
    sermons.append(item)

Query executed for E030276?, but no data found.
Query executed for E030069 ; E030304, but no data found.
Query executed for E011607 ;  E011608, but no data found.
Query executed for E030057 (Ortsteil Neustadt), but no data found.


In [6]:
G = nx.DiGraph()

In [7]:
nodes = []
connections = []
for sermon in sermons:
    nodes.append(sermon['id'])
    for link in sermon['links']:
        if re.match(r'E00[0-9]{4}', link):
            connections.append((sermon['id'], link))

G.add_nodes_from(nodes)
G.add_edges_from(connections)

In [8]:
in_degrees = dict(G.in_degree())
print(in_degrees)

{'E000001': 0, 'E000002': 1, 'E000029': 0, 'E000030': 0, 'E000099': 1, 'E000003': 10, 'E000098': 0, 'E000096': 0, 'E000095': 0, 'E000092': 1, 'E000091': 3, 'E000090': 0, 'E000089': 0, 'E000086': 0, 'E000085': 1, 'E000083': 1, 'E000082': 0, 'E000079': 3, 'E000078': 1, 'E000075': 0, 'E000106': 0, 'E000073': 1, 'E000072': 1, 'E000070': 0, 'E000108': 0, 'E000069': 0, 'E000068': 1, 'E000067': 1, 'E000065': 0, 'E000063': 0, 'E000060': 0, 'E000058': 1, 'E000059': 0, 'E000056': 0, 'E000057': 0, 'E000055': 0, 'E000104': 0, 'E000053': 0, 'E000051': 1, 'E000052': 0, 'E000074': 0, 'E000048': 0, 'E000046': 0, 'E000045': 0, 'E000061': 0, 'E000042': 0, 'E000109': 0, 'E000041': 0, 'E000039': 0, 'E000038': 0, 'E000036': 0, 'E000037': 0, 'E000035': 0, 'E000034': 0, 'E000027': 1, 'E000024': 0, 'E000023': 0, 'E000021': 0, 'E000020': 0, 'E000016': 0, 'E000014': 0, 'E000015': 0, 'E000009': 0, 'E000008': 0, 'E000007': 0, 'E000031': 3, 'E000005': 3, 'E000088': 1, 'E000100': 1, 'E000094': 1}


In [9]:
#pos = nx.circular_layout(G)
#pos = nx.spectral_layout(G)
#pos = nx.fruchterman_reingold_layout(G, k=0.5)
pos = nx.spring_layout(G, k=0.5, iterations=100)

In [10]:
degrees = dict(G.degree())

In [11]:
for node in G.nodes:
    G.nodes[node]['pos'] = pos[node]

In [12]:
for node in G.nodes():
    assert 'pos' in G.nodes[node], f"Node {node} missing 'pos'"
    assert G.nodes[node]['pos'] is not None, f"Node {node} has None position"

In [13]:
mapping = {i: name for i, name in enumerate(ids)}
G = nx.relabel_nodes(G, mapping)
#for node, name in zip(G.nodes(), ids):
#    G.nodes[node]['label'] = name

In [14]:
edge_x = []
edge_y = []
edge_shapes = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)
    
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
node_sizes = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)
    node_sizes.append(degrees[node] * 10)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    text=[n for n in G.nodes()],
    marker=dict(
        showscale=True,
        size=node_sizes,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='Magma',
        reversescale=False,
        color=[],
        colorbar=dict(
            thickness=15,
            title=dict(
              text='Node Connections',
              side='right'
            ),
            xanchor='left',
        ),
        line_width=2))

In [15]:
node_adjacencies = []
node_text = []
in_connections = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    #node_text.append('# of connections: '+str(len(adjacencies[1])))
for id in ids:
    node_text.append(f"{id} ({in_degrees[id]} Verweise)")
    in_connections.append(in_degrees[id])


node_trace.marker.color = in_connections
node_trace.marker.size = [(x + 4) * 3  for x in in_connections]
node_trace.text = node_text

In [16]:
fig = go.Figure(data=[edge_trace, node_trace],
            layout=go.Layout(
                title=dict(
                    text="<br>Quotations in between sermons",
                    font=dict(size=16)
                    ),
                #shapes=edge_shapes,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=40,l=10,r=10,t=80),
                annotations=[dict(
                    text="",
                    showarrow=True,
                    xref="paper", yref="paper",
                    x=0.00, y=-0.00 )],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
            )
        )

fig.show()

In [17]:
G = nx.DiGraph()

In [18]:
nodes = []
connections = []
for sermon in sermons:
    nodes.append(sermon['id'])
    for link in sermon['links']:
        connections.append((sermon['id'], link))

G.add_nodes_from(nodes)
G.add_edges_from(connections)

In [19]:
in_degrees = dict(G.in_degree())
print(in_degrees)

{'E000001': 0, 'E000002': 1, 'E000029': 0, 'E000030': 0, 'E000099': 1, 'E000003': 10, 'E000098': 0, 'E000096': 0, 'E000095': 0, 'E000092': 1, 'E000091': 3, 'E000090': 0, 'E000089': 0, 'E000086': 0, 'E000085': 1, 'E000083': 1, 'E000082': 0, 'E000079': 3, 'E000078': 1, 'E000075': 0, 'E000106': 0, 'E000073': 1, 'E000072': 1, 'E000070': 0, 'E000108': 0, 'E000069': 0, 'E000068': 1, 'E000067': 1, 'E000065': 0, 'E000063': 0, 'E000060': 0, 'E000058': 1, 'E000059': 0, 'E000056': 0, 'E000057': 0, 'E000055': 0, 'E000104': 0, 'E000053': 0, 'E000051': 1, 'E000052': 0, 'E000074': 0, 'E000048': 0, 'E000046': 0, 'E000045': 0, 'E000061': 0, 'E000042': 0, 'E000109': 0, 'E000041': 0, 'E000039': 0, 'E000038': 0, 'E000036': 0, 'E000037': 0, 'E000035': 0, 'E000034': 0, 'E000027': 1, 'E000024': 0, 'E000023': 0, 'E000021': 0, 'E000020': 0, 'E000016': 0, 'E000014': 0, 'E000015': 0, 'E000009': 0, 'E000008': 0, 'E000007': 0, 'E080156': 1, 'E080161': 4, 'E080176': 6, 'E080177': 5, 'E080155': 1, 'E080181': 1, 'E08

In [20]:
pos = nx.spring_layout(G, k=2, iterations=100)
degrees = dict(G.degree())

In [21]:
for node in G.nodes:
    G.nodes[node]['pos'] = pos[node]

In [22]:
for node in G.nodes():
    assert 'pos' in G.nodes[node], f"Node {node} missing 'pos'"
    assert G.nodes[node]['pos'] is not None, f"Node {node} has None position"

In [23]:
mapping = {i: name for i, name in enumerate(ids)}
G = nx.relabel_nodes(G, mapping)

In [24]:
import db_connection
cursor, connection = db_connection.get_connection()

In [25]:
def get_short_info(id):
    cursor = connection.cursor()
    if is_id(id):
        if id.startswith("E08"):
            try:
                cursor.execute(f"SELECT e08autor1, e08titel1, e08ort, e08jahr FROM e08_quellen WHERE e08id = '{id}'")
                column_names = [col[0] for col in cursor.description]
                results = cursor.fetchall()
                if results:
                    data = [dict(zip(column_names, row)) for row in results][0]
                    author = data.get("e08autor1", "no author")
                    title = data.get("e08titel1", "no title")
                    place = data.get("e08ort", "s.l.")
                    year = data.get("e08jahr", "s.a.")
                    return f"{author}: {title} ({place}, {year})"
                else:
                    return "no data for this source"
            except Error as e:
                print(f"Database error occurred for {id}:", e)
            except Exception as e:
                print(f"Unexpected error for {id}:", e)
        elif id.startswith("E09"):
            try:
                cursor.execute(f"SELECT e09autor1, e09titel1, e09ort, e09jahr FROM e09_literatur WHERE e09id = '{id}'")
                column_names = [col[0] for col in cursor.description]
                results = cursor.fetchall()
                if results:
                    data = [dict(zip(column_names, row)) for row in results][0]
                    author = data.get("e09autor1", "no author")
                    title = data.get("e09titel1", "no title")
                    place = data.get("e09ort", "s.l.")
                    year = data.get("e09jahr", "s.a.")
                    return f"{author}: {title} ({place}, {year})"
                else:
                    return "no data for this source"
            except Error as e:
                print(f"Database error occurred for {id}:", e)
            except Exception as e:
                print(f"Unexpected error for {id}:", e)
        elif id.startswith("E10"):
            try:
                cursor.execute(f"SELECT e10komponist, e10werk FROM e10_musikwerke WHERE e10id = '{id}'")
                column_names = [col[0] for col in cursor.description]
                results = cursor.fetchall()
                if results:
                    data = [dict(zip(column_names, row)) for row in results][0]
                    composer = data.get("e10komponist", "no composer")
                    title = data.get("e10werk", "no title")
                    return f"{composer}: {title}"
                else:
                    return "no data for this source"
            except Error as e:
                print(f"Database error occurred for {id}:", e)
            except Exception as e:
                print(f"Unexpected error for {id}:", e)
        elif id.startswith("E00"):
            try:
                cursor.execute(f"SELECT e00autor, e00kurztitel FROM e00_orgelpredigten WHERE e00id = '{id}'")
                column_names = [col[0] for col in cursor.description]
                results = cursor.fetchall()
                if results:
                    sermon_info = [dict(zip(column_names, row)) for row in results][0]
                    author = Person(sermon_info["e00autor"]).name
                    title = sermon_info["e00kurztitel"]
                    return f"{author}: {title}"
                else:
                    return "no data for this source"
            except Error as e:
                print(f"Database error occurred for {id}:", e)
            except Exception as e:
                print(f"Unexpected error for {id}:", e)
        else:
            return f"{id}"
        


In [26]:
color_map = {
        'orgelpredigt': 'rgb(135, 44, 162)',
        'musikwerk': 'rgb(192, 54, 157)',
        'literatur': 'rgb(234, 79, 136)',
        'quelle': 'rgb(250, 120, 118)',
        'bibel': 'rgb(246, 169, 122)',
        'nan': 'rgb(237, 217, 163)',
        'text': 'rgb(237, 217, 163)',
        'E00': 'rgb(135, 44, 162)',
        'E10': 'rgb(192, 54, 157)',
        'E09': 'rgb(234, 79, 136)',
        'E08': 'rgb(250, 120, 118)'
    }

In [27]:
edge_x = []
edge_y = []
edge_shapes = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)
    
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
node_sizes = []
node_colors = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)
    node_sizes.append(degrees[node] * 10)
    node_colors.append(color_map.get(node[:3], 'gray'))

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    text=[n for n in G.nodes()],
    marker=dict(
        showscale=False,
        size=node_sizes,
        colorscale='Magma',
        reversescale=False,
        color=node_colors,
        line_width=2))

In [28]:
in_degrees_list = [in_degrees[node] for node in G.nodes]

In [29]:
node_adjacencies = []
node_text = []
in_connections = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    #node_text.append('# of connections: '+str(len(adjacencies[1])))
for node in G.nodes:
    node_text.append(f"{get_short_info(node)} ({in_degrees[node]} Verweise)")
    in_connections.append(in_degrees[id])

node_trace.marker.size = [(x + 3) * 2.5  for x in in_degrees_list]
node_trace.text = node_text

Query executed for E030276?, but no data found.
Query executed for E030057 (Ortsteil Neustadt), but no data found.


In [30]:
def create_legend(color_map):
    legend_translation = {
        "E00": "Predigt",
        "E10": "Musikwerk",
        "E08": "Quelle",
        "E09": "Literatur"
    }
    legend_traces = []

    for group_name, color in color_map.items():
        if group_name.startswith("E"):
            legend_traces.append(
                go.Scatter(
                    x=[None], y=[None],  # invisible point
                    mode='markers',
                    marker=dict(size=10, color=color),
                    legendgroup=group_name,
                    showlegend=True,
                    name=legend_translation[group_name]
                )
            )
    return legend_traces

In [31]:
fig = go.Figure(data=[edge_trace, node_trace],
            layout=go.Layout(
                title=dict(
                    text="<br>Quotations in between sermons",
                    font=dict(size=16)
                    ),
                #shapes=edge_shapes,
                showlegend=True,
                hovermode='closest',
                margin=dict(b=40,l=10,r=10,t=80),
                annotations=[dict(
                    text="",
                    showarrow=True,
                    xref="paper", yref="paper",
                    x=0.00, y=-0.00 )],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
            )
        )

legend_traces = create_legend(color_map)

for trace in legend_traces:
    fig.add_trace(trace)

fig.update_layout(
    xaxis=dict(scaleanchor='y', scaleratio=1),
    yaxis=dict(scaleanchor='x', scaleratio=1),
    width=1200, height=1200,
    legend=dict(
        title='Kategorien',
        x=1.05,  # position legend to the right
        y=1,
        bgcolor='rgba(255,255,255,0.7)',
        bordercolor='black',
        borderwidth=1
    )
)

fig.show()