# Statistical tools for the Ugaritic Texts Database

This jupyter notebook contains functions which extracts data from the [database of texts from Ugarit](https://github.com/valekfrantisek/UgariticReligion/blob/main/UGARIT_TEXTS.csv). I am sorry that it is not written more clearly and with more comments, but it has been created initially for personal use only. Hopefully, the names of the objects and functions are self-explanotory enough.

In [1]:
import os
import pandas as pd

from collections import defaultdict

In [2]:
""" Defining paths """

ROOT_PATH = os.getcwd()
DATABASE = os.path.join(ROOT_PATH, 'UGARIT_TEXTS.csv')

In [3]:
def list_locations_et_languages():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_et_languages = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        languages = df.iloc[row_id]['Language']

        if locations_et_languages[general_area]:
            locations_et_languages[general_area][languages] += 1
        else:
            locations_et_languages[general_area] = defaultdict(int)
            locations_et_languages[general_area][languages] += 1

    row_mockup = {'Akkadian': 0, 'Ugaritic': 0, 'Hurrian': 0, 'Egyptian': 0, 'Cypro-Minoan?': 0, 'Hittite': 0, 'Sumerian': 0, 'multilingual': 0, 'unclassified': 0, 'total': 0}

    locations_to_count_in = ['Royal Palace', 'House of Urtenu', 'House of Rapanu', 'House of Yabninu', 'Lamaštu', 'House of the Literary Tablets', 'Between Royal Palace and South Palace', 'House of the High Priest', 'House of Rašapabu', 'Literate’s House', 'House of the Hurrian Priest']
    
    out_dict = {'Other/unknown': row_mockup.copy()}
    
    for location in locations_et_languages:
        print(location)
        total = 0
        row_dict = row_mockup.copy()
        for lang in locations_et_languages[location]:
            try:
                num_of_languages = len(lang.split(';'))
            except:
                num_of_languages = 1

            if lang == 'Phoenician' or lang == 'Latin':
                continue
            elif num_of_languages >= 2:
                print('\t', lang, type(lang), locations_et_languages[location][lang])
                total += locations_et_languages[location][lang]
                if location in locations_to_count_in:
                    row_dict['multilingual'] = locations_et_languages[location][lang]
                elif location == 'Ras Ibn-Hani' or location == 'Minet el-Beida' or location == 'outside':
                    continue
                else:
                    out_dict['Other/unknown']['multilingual'] += locations_et_languages[location][lang]
            else:
                print('\t', lang, type(lang), locations_et_languages[location][lang])
                total += locations_et_languages[location][lang]
                if location in locations_to_count_in:
                    if type(lang) == float:
                        row_dict['unclassified'] = locations_et_languages[location][lang]
                    else:
                        row_dict[lang] = locations_et_languages[location][lang]
                elif location == 'Ras Ibn-Hani' or location == 'Minet el-Beida' or location == 'outside':
                    continue
                else:
                    if type(lang) == float:
                        out_dict['Other/unknown']['unclassified'] += locations_et_languages[location][lang]
                    else:
                        out_dict['Other/unknown'][lang] += locations_et_languages[location][lang]
        
        if location in locations_to_count_in:
            row_dict['total'] = total
            out_dict[location] = row_dict

        print('\t\tTotal:', total)

    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'locations_et_languages.csv'), sep=',', encoding='utf-8')

In [18]:
list_locations_et_languages()

nan
	 nan <class 'float'> 82
	 Ugaritic <class 'str'> 228
	 Akkadian; Ugaritic <class 'str'> 5
	 Hurrian <class 'str'> 2
	 Egyptian <class 'str'> 4
	 Akkadian <class 'str'> 72
	 Cypro-Minoan? <class 'str'> 2
	 Hittite <class 'str'> 1
		Total: 396
Royal Palace
	 Akkadian; Hurrian <class 'str'> 1
	 Egyptian <class 'str'> 16
	 nan <class 'float'> 36
	 Ugaritic <class 'str'> 981
	 Hurrian <class 'str'> 67
	 Akkadian; Hittite <class 'str'> 1
	 Akkadian; Ugaritic <class 'str'> 22
	 Akkadian <class 'str'> 553
	 Hittite <class 'str'> 2
	 Akkadian; Sumerian <class 'str'> 1
		Total: 1680
Literate’s House
	 Sumerian <class 'str'> 1
	 Akkadian <class 'str'> 13
	 Ugaritic <class 'str'> 12
		Total: 26
House of the Literary Tablets
	 Akkadian; Sumerian <class 'str'> 2
	 Akkadian <class 'str'> 79
	 Akkadian; Hittite <class 'str'> 1
	 Akkadian; Ugaritic <class 'str'> 1
	 Ugaritic <class 'str'> 13
	 nan <class 'float'> 2
	 Egyptian <class 'str'> 1
		Total: 99
Northwest of the tell
	 Ugaritic <class 'str

In [8]:
def list_languages():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    languages_counts = defaultdict(int)

    for row_id in df.index:
        languages = df.iloc[row_id]['Language']
        if languages == 'Latin' or languages == 'Phoenician':
            continue
        else:
            try:
                lang_count = len(languages.split(';'))
            except:
                lang_count = 1
            
            if type(languages) == float:
                languages_counts['unclassified'] += 1
            elif lang_count >= 2:
                languages_counts['multilingual'] += 1
            else:
                languages_counts[languages] += 1
    
    out_dict = {}
    for i, lang in enumerate(languages_counts):
        print(lang, languages_counts[lang])
        out_dict[i] = {'language': lang, 'count of texts': languages_counts[lang]}

    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'languages.csv'), sep=',', encoding='utf-8')

In [9]:
list_languages()

unclassified 226
multilingual 108
Sumerian 10
Ugaritic 1936
Akkadian 1925
Hittite 12
Hurrian 95
Egyptian 100
Cypro-Minoan? 12


In [10]:
def multilingual_data():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    languages_counts = defaultdict(int)

    for row_id in df.index:
        languages = df.iloc[row_id]['Language']
        try:
            lang_count = len(languages.split(';'))
            if lang_count >= 2:
                languages_counts[languages] += 1
        except:
            continue
    
    out_dict = {}
    for i, lang in enumerate(languages_counts):
        print(lang, languages_counts[lang])
        out_dict[i] = {'languages': lang, 'count of texts': languages_counts[lang]}

    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'multilingual.csv'), sep=',', encoding='utf-8')

In [12]:
multilingual_data()

Akkadian; Hurrian 2
Akkadian; Sumerian 31
Akkadian; Hittite; Sumerian 1
Akkadian; Ugaritic 55
Akkadian; Hittite 2
Akkadian; Hurrian; Sumerian; Ugaritic 8
Akkadian; Hurrian; Sumerian 3
Hurrian; Ugaritic 5
Ugaritic; Hittite 1


In [13]:
ktu_classification = {
    1: 'Literary and Religious',
    2: 'Letters',
    3: 'Legal and Juridical',
    4: 'Economic',
    5: 'Scribal Excercises',
    6: 'Inscriptions',
}

# ktu_classification = {
#     1: 'Literary and Religious',
#     2: 'Letters',
#     3: 'Legal and Juridical',
#     4: 'Economic',
#     5: 'Scribal Excercises',
#     6: 'Inscriptions',
#     7: 'Unclassified',
#     8: 'Illegible and uninscribed',
#     9: 'Unpublished',
#     10: 'Ugaritic in syllabic'
# }

def list_locations_et_ktu_genres():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_et_ktu = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        ktu_num = str(df.iloc[row_id]['KTU3'])
        script = str(df.iloc[row_id]['Script'])
        ktu_category = ktu_num.split('.')[0]

        if 'Alphabetic' in script:
            if locations_et_ktu[general_area]:
                locations_et_ktu[general_area][ktu_category] += 1
            else:
                locations_et_ktu[general_area] = defaultdict(int)
                locations_et_ktu[general_area][ktu_category] += 1
    
    row_mockup = {'Literary and Religious': 0, 'Letters': 0, 'Legal and Juridical': 0, 'Economic': 0, 'Scribal Excercises': 0, 'Inscriptions': 0, 'Unclassified etc': 0, 'total': 0}
    locations_to_count_in = ['Royal Palace', 'House of Urtenu', 'House of Rapanu', 'House of Yabninu', 'Lamaštu', 'House of the Literary Tablets', 'Between Royal Palace and South Palace', 'House of the High Priest', 'House of Rašapabu', 'Literate’s House', 'House of the Hurrian Priest']

    out_dict = {'Other/unknown': row_mockup.copy()}

    for location in locations_et_ktu:
        print(location)
        if location in locations_to_count_in:
            total = 0
            row_data = row_mockup.copy()
            for ktu in locations_et_ktu[location]:
                print('\t', ktu, locations_et_ktu[location][ktu])
                try:                
                    row_data[ktu_classification[int(ktu)]] = locations_et_ktu[location][ktu]
                    total += locations_et_ktu[location][ktu]
                except:
                    row_data['Unclassified etc'] += locations_et_ktu[location][ktu]
                    total += locations_et_ktu[location][ktu]
            row_data['total'] = total
            print('\t\tTotal:', total)
            out_dict[location] = row_data
        else:
            total = 0
            row_data = out_dict['Other/unknown']
            for ktu in locations_et_ktu[location]:
                print('\t', ktu, locations_et_ktu[location][ktu])
                try:                
                    row_data[ktu_classification[int(ktu)]] += locations_et_ktu[location][ktu]
                    total += locations_et_ktu[location][ktu]
                except:
                    row_data['Unclassified etc'] += locations_et_ktu[location][ktu]
                    total += locations_et_ktu[location][ktu]
            row_data['total'] = 0
            print('\t\tTotal:', total)
            out_dict['Other/unknown'] = row_data

    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'ktu_in_locations.csv'), sep=',', encoding='utf-8')

In [17]:
list_locations_et_ktu_genres()

Royal Palace
	 nan 271
	 4 528
	 3 22
	 7 49
	 2 46
	 5 10
	 1 16
	 6 62
		Total: 1004
nan
	 nan 94
	 1 9
	 7 34
	 4 79
	 2 10
	 3 3
	 5 1
	 6 5
		Total: 235
Northwest of the tell
	 nan 1
	 2 1
	 4 20
	 5 1
		Total: 23
Debris
	 nan 1
	 2 1
	 3 1
	 4 1
		Total: 4
Literate’s House
	 nan 10
	 2 1
	 4 1
		Total: 12
House of the High Priest
	 1 54
	 nan 6
	 6 5
	 7 20
	 4 21
	 2 11
	 5 1
		Total: 118
Acropolis
	 1 13
	 7 9
	 nan 6
	 4 6
	 5 3
	 6 5
		Total: 42
House of Yabninu
	 nan 15
	 2 3
	 4 16
	 5 1
		Total: 35
House of the Hurrian Priest
	 1 58
	 5 2
	 4 12
	 7 70
	 nan 2
	 8 8
		Total: 152
House of the Literary Tablets
	 1 4
	 nan 1
	 4 4
	 5 1
	 7 4
		Total: 14
House of Urtenu
	 1 3
	 2 26
	 3 2
	 4 78
	 5 4
	 6 1
	 7 5
		Total: 119
House of Rapanu
	 nan 6
	 2 1
	 4 13
	 5 3
		Total: 23
Main Street
	 nan 1
		Total: 1
Residential District
	 nan 1
	 2 2
	 4 7
	 1 1
	 6 4
	 5 1
	 7 1
		Total: 17
Walls restoration
	 nan 1
	 3 1
	 4 4
		Total: 6
Between Royal Palace and South Palace
	 na

In [34]:
def get_religious_texts_stats():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_et_reli = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        
        is_in_clemens = df.iloc[row_id]['Clemens 2001']
        if 'PRAVDA' == is_in_clemens:
            is_in_clemens = True
        else:
            is_in_clemens = False

        possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
        if type(possible_relevant_genre) == float:
            possible_relevant_genre = False

        ktu_num = str(df.iloc[row_id]['KTU3'])
        ktu_category = ktu_num.split('.')[0]

        if possible_relevant_genre or ktu_category == '1':
            if locations_et_reli[general_area]:
                locations_et_reli[general_area]['religious'] += 1
            else:
                locations_et_reli[general_area] = defaultdict(int)
                locations_et_reli[general_area]['religious'] += 1

        
        elif is_in_clemens:
            if locations_et_reli[general_area]:
                locations_et_reli[general_area]['related'] += 1
            else:
                locations_et_reli[general_area] = defaultdict(int)
                locations_et_reli[general_area]['related'] += 1

        else:
            if locations_et_reli[general_area]:
                locations_et_reli[general_area]['other'] += 1
            else:
                locations_et_reli[general_area] = defaultdict(int)
                locations_et_reli[general_area]['other'] += 1
    
    out_dict = {'Other/unknown': {'religious': 0, 'related':0, 'other': 0, 'total': 0}}
    locations_to_count_in = ['Royal Palace', 'House of Urtenu', 'House of Rapanu', 'House of Yabninu', 'Lamaštu', 'House of the Literary Tablets', 'Between Royal Palace and South Palace', 'House of the High Priest', 'House of Rašapabu', 'Literate’s House', 'House of the Hurrian Priest']
    
    for location in locations_et_reli:
        print(location)
        if location in locations_to_count_in:
            row_data = {'religious': 0, 'other': 0, 'total': 0}
            for religious in locations_et_reli[location]:
                print('\t', religious, locations_et_reli[location][religious])
                row_data[religious] = locations_et_reli[location][religious]
                row_data['total'] += locations_et_reli[location][religious]

            out_dict[location] = row_data
        else:
            row_data = out_dict['Other/unknown']
            for religious in locations_et_reli[location]:
                print('\t', religious, locations_et_reli[location][religious])
                row_data[religious] += locations_et_reli[location][religious]

            out_dict['Other/unknown'] = row_data


    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'religious_stats.csv'), sep=',', encoding='utf-8')
    

In [35]:
get_religious_texts_stats()

nan
	 other 374
	 religious 12
	 related 10
Royal Palace
	 religious 142
	 other 1382
	 related 156
Literate’s House
	 religious 3
	 other 22
	 related 1
House of the Literary Tablets
	 religious 14
	 other 78
	 related 7
Northwest of the tell
	 other 42
	 religious 2
	 related 4
Debris
	 other 14
	 religious 1
	 related 1
Lamaštu
	 religious 25
	 other 61
	 related 19
House of Urtenu
	 religious 28
	 related 42
	 other 613
Lower City
	 religious 1
	 other 24
	 related 4
Temple of Baal
	 religious 1
	 other 6
	 related 1
South Acropolis
	 religious 9
	 other 35
	 related 8
City Centre
	 religious 6
	 other 6
	 related 1
House of the High Priest
	 religious 65
	 other 58
	 related 28
House of Rapanu
	 religious 5
	 other 311
	 related 19
South City
	 religious 3
	 other 53
	 related 1
Acropolis
	 religious 15
	 other 37
	 related 12
House of Yabninu
	 other 133
	 related 3
House of Rašapabu
	 religious 2
	 other 22
	 related 2
House of the Hurrian Priest
	 religious 60
	 other 87
	 rela

In [25]:
def get_religious_texts_detailed_stats():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_et_religenre = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        
        is_in_clemens = df.iloc[row_id]['Clemens 2001']
        if 'PRAVDA' == is_in_clemens:
            is_in_clemens = True
        else:
            is_in_clemens = False

        possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
        if type(possible_relevant_genre) == float:
            possible_relevant_genre = False

        ktu_num = str(df.iloc[row_id]['KTU3'])
        ktu_category = ktu_num.split('.')[0]

        if possible_relevant_genre:
            if 'ritual' in possible_relevant_genre or  'sacrifices' in possible_relevant_genre or 'offerings' in possible_relevant_genre:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['ritual'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['ritual'] += 1
            elif  'myth' in possible_relevant_genre or  'epic' in possible_relevant_genre or 'wisdom' in possible_relevant_genre or 'literary' in possible_relevant_genre or 'narrative' in possible_relevant_genre:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['narrative'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['narrative'] += 1
            elif  'hymn' in possible_relevant_genre or  'prayer' in possible_relevant_genre:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['hymn/prayer'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['hymn/prayer'] += 1
            elif 'incantation' in possible_relevant_genre or 'magic' in possible_relevant_genre or 'medical' in possible_relevant_genre:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['incantation/magic'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['incantation/magic'] += 1
            elif  'omen' in possible_relevant_genre or  'divination' in possible_relevant_genre or 'oracular' in possible_relevant_genre or 'divinatiory' in possible_relevant_genre:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['divination'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['divination'] += 1
            else:
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['other religious'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['other religious'] += 1

        elif is_in_clemens:
            if locations_et_religenre[general_area]:
                locations_et_religenre[general_area]['related'] += 1
            else:
                locations_et_religenre[general_area] = defaultdict(int)
                locations_et_religenre[general_area]['related'] += 1
        
        else:
            continue

    row_mockup = {'ritual': 0, 'narrative': 0, 'hymn/prayer': 0, 'divination': 0, 'incantation/magic': 0, 'other religious': 0, 'related': 0}
    row_mockup['total'] = 0

    out_dict = {'Other/unknown': row_mockup.copy()}
    locations_to_count_in = ['Royal Palace', 'House of Urtenu', 'House of Rapanu', 'House of Yabninu', 'Lamaštu', 'House of the Literary Tablets', 'Between Royal Palace and South Palace', 'House of the High Priest', 'House of Rašapabu', 'Literate’s House', 'House of the Hurrian Priest']
    
    for location in locations_et_religenre:
        print(location)
        if location in locations_to_count_in:
            row_data = row_mockup.copy()
            for religious in locations_et_religenre[location]:
                print('\t', religious, locations_et_religenre[location][religious])
                row_data[religious] = locations_et_religenre[location][religious]
                row_data['total'] += locations_et_religenre[location][religious]

            out_dict[location] = row_data
        else:
            row_data = out_dict['Other/unknown']
            for religious in locations_et_religenre[location]:
                print('\t', religious, locations_et_religenre[location][religious])
                row_data[religious] += locations_et_religenre[location][religious]

            out_dict['Other/unknown'] = row_data


    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'religious_detailed_stats.csv'), sep=',', encoding='utf-8')

In [26]:
get_religious_texts_detailed_stats()

Royal Palace
	 narrative 8
	 related 156
	 hymn/prayer 66
	 divination 49
	 ritual 7
	 other religious 11
	 incantation/magic 1
Literate’s House
	 narrative 2
	 related 1
	 incantation/magic 1
House of the Literary Tablets
	 narrative 8
	 incantation/magic 2
	 hymn/prayer 1
	 divination 3
	 related 7
Lamaštu
	 narrative 8
	 hymn/prayer 4
	 other religious 1
	 incantation/magic 9
	 divination 3
	 related 19
House of Urtenu
	 narrative 12
	 ritual 3
	 other religious 2
	 incantation/magic 5
	 hymn/prayer 1
	 divination 5
	 related 42
Lower City
	 other religious 1
	 related 4
Temple of Baal
	 other religious 1
	 related 1
nan
	 incantation/magic 1
	 other religious 3
	 ritual 6
	 narrative 1
	 divination 1
	 related 10
South Acropolis
	 other religious 2
	 narrative 3
	 hymn/prayer 1
	 divination 1
	 related 8
	 incantation/magic 2
City Centre
	 hymn/prayer 2
	 narrative 2
	 ritual 1
	 divination 1
	 related 1
House of the High Priest
	 other religious 16
	 ritual 20
	 related 28
	 hymn/

In [31]:
def religious_questioned():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_et_religenre = defaultdict(dict)
    religious = 0
    religious_possibly = 0
    only_clemens = 0

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        
        is_in_clemens = df.iloc[row_id]['Clemens 2001']
        if 'PRAVDA' == is_in_clemens:
            is_in_clemens = True
        else:
            is_in_clemens = False

        possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
        if type(possible_relevant_genre) == float:
            possible_relevant_genre = False

        ktu_num = str(df.iloc[row_id]['KTU3'])
        ktu_category = ktu_num.split('.')[0]

        if possible_relevant_genre:
            if '?' in possible_relevant_genre:
                religious_possibly += 1
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['religious?'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['religious?'] += 1
            else:
                religious += 1
                if locations_et_religenre[general_area]:
                    locations_et_religenre[general_area]['other religious'] += 1
                else:
                    locations_et_religenre[general_area] = defaultdict(int)
                    locations_et_religenre[general_area]['other religious'] += 1

        elif is_in_clemens:
            only_clemens += 1
            if locations_et_religenre[general_area]:
                locations_et_religenre[general_area]['related'] += 1
            else:
                locations_et_religenre[general_area] = defaultdict(int)
                locations_et_religenre[general_area]['related'] += 1
        
        else:
            continue

    print('religious:', religious)
    print('possibly:', religious_possibly)
    print('Only Clemens:', only_clemens)    

In [32]:
religious_questioned()

religious: 341
possibly: 75
Only Clemens: 360


In [36]:
def religious_languages():
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    locations_languages = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']

        languages = df.iloc[row_id]['Language']
        if ';' in str(languages):
            languages = 'multilingual'
        
        is_in_clemens = df.iloc[row_id]['Clemens 2001']
        if 'PRAVDA' == is_in_clemens:
            is_in_clemens = True
        else:
            is_in_clemens = False

        possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
        if type(possible_relevant_genre) == float:
            possible_relevant_genre = False

        ktu_num = str(df.iloc[row_id]['KTU3'])
        ktu_category = ktu_num.split('.')[0]

        if possible_relevant_genre:
            if locations_languages[general_area]:
                locations_languages[general_area][languages] += 1
            else:
                locations_languages[general_area] = defaultdict(int)
                locations_languages[general_area][languages] += 1

        elif is_in_clemens:
            if locations_languages[general_area]:
                locations_languages[general_area][f'related {languages}'] += 1
            else:
                locations_languages[general_area] = defaultdict(int)
                locations_languages[general_area][f'related {languages}'] += 1
        
        else:
            continue

    row_mockup = {'Ugaritic': 0, 'Akkadian': 0, 'Hurrian': 0, 'Sumerian': 0, 'multilingual': 0, 'related Ugaritic': 0, 'related Akkadian': 0, 'related Sumerian':0, 'related multilingual': 0}
    row_mockup['total'] = 0

    out_dict = {'Other/unknown': row_mockup.copy()}
    locations_to_count_in = ['Royal Palace', 'House of Urtenu', 'House of Rapanu', 'House of Yabninu', 'Lamaštu', 'House of the Literary Tablets', 'Between Royal Palace and South Palace', 'House of the High Priest', 'House of Rašapabu', 'Literate’s House', 'House of the Hurrian Priest']
    
    for location in locations_languages:
        print(location)
        if location in locations_to_count_in:
            row_data = row_mockup.copy()
            for lang in locations_languages[location]:
                print('\t', lang, locations_languages[location][lang])
                row_data[lang] = locations_languages[location][lang]
                row_data['total'] += locations_languages[location][lang]

            out_dict[location] = row_data
        else:
            row_data = out_dict['Other/unknown']
            for lang in locations_languages[location]:
                print('\t', lang, locations_languages[location][lang])
                row_data[lang] += locations_languages[location][lang]

            out_dict['Other/unknown'] = row_data


    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', 'religious_langs_stats.csv'), sep=',', encoding='utf-8')

In [37]:
religious_languages()

Royal Palace
	 multilingual 2
	 related Ugaritic 67
	 Hurrian 66
	 Ugaritic 74
	 related multilingual 7
	 related Akkadian 82
Literate’s House
	 Sumerian 1
	 Akkadian 2
	 related Akkadian 1
House of the Literary Tablets
	 multilingual 4
	 Akkadian 6
	 Ugaritic 4
	 related Akkadian 7
Lamaštu
	 multilingual 9
	 Akkadian 16
	 related Akkadian 18
	 related Ugaritic 1
House of Urtenu
	 Akkadian 19
	 multilingual 2
	 Hittite 2
	 Sumerian 1
	 Ugaritic 4
	 related Akkadian 38
	 related Ugaritic 3
	 related multilingual 1
Lower City
	 Sumerian 1
	 related Akkadian 3
	 related Ugaritic 1
Temple of Baal
	 Akkadian 1
	 related Akkadian 1
nan
	 multilingual 1
	 Hurrian 2
	 Ugaritic 8
	 Akkadian 1
	 related Akkadian 4
	 related Ugaritic 6
South Acropolis
	 multilingual 1
	 Akkadian 5
	 Sumerian 2
	 Ugaritic 1
	 related Akkadian 7
	 related Ugaritic 1
City Centre
	 Sumerian 1
	 Akkadian 4
	 Ugaritic 1
	 related Akkadian 1
House of the High Priest
	 Hurrian 16
	 Ugaritic 49
	 related Ugaritic 19
	 rel

In [48]:
ktu_classification = {
    '1': 'Literary and Religious',
    '2': 'Letters',
    '3': 'Legal and Juridical',
    '4': 'Economic',
    '5': 'Scribal Excercises',
    '6': 'Inscriptions',
    '7': 'Unclassified etc.',
    '8': 'Unclassified etc.',
    '9': 'Unclassified etc.'
}

def list_genres_in_cluster(cluster:str):
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    languages_et_religenre = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        if general_area == cluster:        
            language = df.iloc[row_id]['Language']
            if ';' in str(language):
                language = 'multilingual'
            
            is_in_clemens = df.iloc[row_id]['Clemens 2001']
            if 'PRAVDA' == is_in_clemens:
                is_in_clemens = True
                clemens_category = df.iloc[row_id]['Clemens 2001 type']
            else:
                is_in_clemens = False

            possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
            if type(possible_relevant_genre) == float:
                possible_relevant_genre = False

            ktu_num = str(df.iloc[row_id]['KTU3'])
            ktu_category = ktu_num.split('.')[0]

            if possible_relevant_genre:
                if 'ritual' in possible_relevant_genre or  'sacrifices' in possible_relevant_genre or 'offerings' in possible_relevant_genre:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language]['ritual'] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language]['ritual'] += 1
                elif  'myth' in possible_relevant_genre or  'epic' in possible_relevant_genre or 'wisdom' in possible_relevant_genre or 'literary' in possible_relevant_genre or 'narrative' in possible_relevant_genre:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language]['narrative'] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language]['narrative'] += 1
                elif  'hymn' in possible_relevant_genre or  'prayer' in possible_relevant_genre:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language]['hymn/prayer'] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language]['hymn/prayer'] += 1
                elif 'incantation' in possible_relevant_genre or 'magic' in possible_relevant_genre or 'medical' in possible_relevant_genre:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language]['incantation/magic'] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language]['incantation/magic'] += 1
                elif  'omen' in possible_relevant_genre or  'divination' in possible_relevant_genre or 'oracular' in possible_relevant_genre or 'divinatiory' in possible_relevant_genre:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language]['divination'] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language]['divination'] += 1
                else:
                    try:
                        ktu_category_classification = ktu_classification[ktu_category]
                        if languages_et_religenre[language]:
                            languages_et_religenre[language][ktu_category_classification] += 1
                        else:
                            languages_et_religenre[language] = defaultdict(int)
                            languages_et_religenre[language][ktu_category_classification] += 1
                    except:
                        if languages_et_religenre[language]:
                            languages_et_religenre[language]['other religious'] += 1
                        else:
                            languages_et_religenre[language] = defaultdict(int)
                            languages_et_religenre[language]['other religious'] += 1

            elif is_in_clemens:
                try:
                    ktu_category_classification = ktu_classification[ktu_category]
                    if languages_et_religenre[language]:
                        languages_et_religenre[language][f"related {ktu_category_classification.replace('?', '')}"] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language][f"related {ktu_category_classification.replace('?', '')}"] += 1
                except:
                    if languages_et_religenre[language]:
                        languages_et_religenre[language][f"related {ktu_category_classification.replace('?', '')}"] += 1
                    else:
                        languages_et_religenre[language] = defaultdict(int)
                        languages_et_religenre[language][f"related {ktu_category_classification.replace('?', '')}"] += 1

            
            else:
                continue
        else:
            continue

    # row_mockup = {'ritual': 0, 'narrative': 0, 'hymn/prayer': 0, 'divination': 0, 'incantation/magic': 0, 'other religious': 0, 'Clemens 2001': 0}
    row_mockup = {}
    for language in languages_et_religenre:
        for genre in languages_et_religenre[language]:
            row_mockup[genre] = 0
    row_mockup['total'] = 0

    out_dict = {}
    languages_to_count_in = [cluster]
    
    for language in languages_et_religenre:
        print(language)
        row_data = row_mockup.copy()
        for religious in languages_et_religenre[language]:
            print('\t', religious, languages_et_religenre[language][religious])
            row_data[religious] = languages_et_religenre[language][religious]
            row_data['total'] += languages_et_religenre[language][religious]

        out_dict[language] = row_data


    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', f'religious_detailed_stats_{cluster}.csv'), sep=',', encoding='utf-8')

In [49]:
list_genres_in_cluster('Royal Palace')
list_genres_in_cluster('House of the Hurrian Priest')
list_genres_in_cluster('House of the High Priest')

multilingual
	 narrative 1
	 related Economic 7
	 other religious 1
Ugaritic
	 related Economic 49
	 related Legal and Juridical 2
	 related Letters 12
	 divination 49
	 ritual 7
	 Economic 10
	 related Scribal Excercises 1
	 narrative 7
	 incantation/magic 1
	 related Inscriptions 2
	 related Unclassified etc. 1
Hurrian
	 hymn/prayer 66
Akkadian
	 related Economic 71
	 related Scribal Excercises 2
	 related Letters 1
	 related Inscriptions 8
Ugaritic
	 narrative 9
	 incantation/magic 3
	 hymn/prayer 3
	 Literary and Religious 4
	 ritual 21
	 divination 8
	 related Economic 3
	 related Unclassified etc. 13
	 Economic 1
multilingual
	 ritual 5
	 related Unclassified etc. 1
Hurrian
	 ritual 2
	 hymn/prayer 2
	 Literary and Religious 2
Akkadian
	 related Unclassified etc. 2
Hurrian
	 Literary and Religious 5
	 ritual 5
	 Unclassified etc. 3
	 hymn/prayer 3
Ugaritic
	 Inscriptions 5
	 Literary and Religious 2
	 related Unclassified etc. 8
	 ritual 15
	 related Economic 8
	 related Letters 

In [44]:
ktu_classification = {
    '1': 'other',
    '2': 'Letters',
    '3': 'Legal and Juridical',
    '4': 'Economic',
    '5': 'Scribal Excercises',
    '6': 'Inscriptions',
    '7': 'other',
    '8': 'other',
    '9': 'other'
}

def list_genres_in_cluster_deteiled_loc(cluster:str):
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    detailed_loc_et_religenre = defaultdict(dict)

    for row_id in df.index:
        general_area = df.iloc[row_id]['Archive/General area']
        if general_area == cluster:
            detailed_loc = df.iloc[row_id]['detail in general']
            
            is_in_clemens = df.iloc[row_id]['Clemens 2001']
            if 'PRAVDA' == is_in_clemens:
                is_in_clemens = True
                clemens_category = df.iloc[row_id]['Clemens 2001 type']
            else:
                is_in_clemens = False

            possible_relevant_genre = df.iloc[row_id]['possible relevant (religious) genres']
            if type(possible_relevant_genre) == float:
                possible_relevant_genre = False

            ktu_num = str(df.iloc[row_id]['KTU3'])
            ktu_category = ktu_num.split('.')[0]

            if possible_relevant_genre:
                if 'ritual' in possible_relevant_genre or  'sacrifices' in possible_relevant_genre or 'offerings' in possible_relevant_genre:
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc]['ritual'] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc]['ritual'] += 1
                elif  'myth' in possible_relevant_genre or  'epic' in possible_relevant_genre or 'wisdom' in possible_relevant_genre or 'literary' in possible_relevant_genre or 'narrative' in possible_relevant_genre:
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc]['narrative'] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc]['narrative'] += 1
                elif  'hymn' in possible_relevant_genre or  'prayer' in possible_relevant_genre:
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc]['hymn/prayer'] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc]['hymn/prayer'] += 1
                elif 'incantation' in possible_relevant_genre or 'magic' in possible_relevant_genre or 'medical' in possible_relevant_genre:
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc]['incantation/magic'] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc]['incantation/magic'] += 1
                elif  'omen' in possible_relevant_genre or  'divination' in possible_relevant_genre or 'oracular' in possible_relevant_genre or 'divinatiory' in possible_relevant_genre:
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc]['divination'] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc]['divination'] += 1
                else:
                    try:
                        ktu_category_classification = ktu_classification[ktu_category]
                        if detailed_loc_et_religenre[detailed_loc]:
                            detailed_loc_et_religenre[detailed_loc][ktu_category_classification] += 1
                        else:
                            detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                            detailed_loc_et_religenre[detailed_loc][ktu_category_classification] += 1
                    except:
                        if detailed_loc_et_religenre[detailed_loc]:
                            detailed_loc_et_religenre[detailed_loc]['other religious'] += 1
                        else:
                            detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                            detailed_loc_et_religenre[detailed_loc]['other religious'] += 1

            elif is_in_clemens:
                try:
                    ktu_category_classification = ktu_classification[ktu_category]
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc][f"related {ktu_category_classification.replace('?', '')}"] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc][f"related {ktu_category_classification.replace('?', '')}"] += 1
                except:
                    if clemens_category == 'Seal':
                        clemens_category = 'other'
                    elif clemens_category == 'Legal' or clemens_category == 'Treaty edict':
                        clemens_category = 'Legal and Juridical'
                    elif clemens_category == 'Letter':
                        clemens_category = 'Letters'
                    elif clemens_category == 'Administrative':
                        clemens_category = 'Economic'
                    else:
                        clemens_category = 'other'
                    
                    if detailed_loc_et_religenre[detailed_loc]:
                        detailed_loc_et_religenre[detailed_loc][f"related {clemens_category.replace('?', '')}"] += 1
                    else:
                        detailed_loc_et_religenre[detailed_loc] = defaultdict(int)
                        detailed_loc_et_religenre[detailed_loc][f"related {clemens_category.replace('?', '')}"] += 1

            
            else:
                continue
        else:
            continue

    # House of the High Priest mockup:
    # row_mockup = {'ritual': 0, 'narrative': 0, 'hymn/prayer': 0, 'incantation/magic': 0, 'Inscriptions': 0, 'Economic': 0, 'other': 0, 'Clemens 2001 Economic': 0, 'Clemens 2001 Letters': 0, 'Clemens 2001 other': 0, 'total': 0}

    # Royal Palace mockup:
    row_mockup = {'ritual': 0, 'narrative': 0, 'hymn/prayer': 0, 'divination': 0, 'incantation/magic': 0, 'other religious': 0, 'Economic': 0, 'related Economic': 0, 'related Legal and Juridical': 0, 'related Letters': 0, 'related Scribal Excercises': 0, 'related Inscriptions': 0, 'related other': 0, 'total': 0}
    # row_mockup = {}
    # for detailed_loc in detailed_loc_et_religenre:
    #     for genre in detailed_loc_et_religenre[detailed_loc]:
    #         row_mockup[genre] = 0
    # row_mockup['total'] = 0

    out_dict = {}
    
    for detailed_loc in detailed_loc_et_religenre:
        print(detailed_loc)
        row_data = row_mockup.copy()
        for religious in detailed_loc_et_religenre[detailed_loc]:
            print('\t', religious, detailed_loc_et_religenre[detailed_loc][religious])
            row_data[religious] = detailed_loc_et_religenre[detailed_loc][religious]
            row_data['total'] += detailed_loc_et_religenre[detailed_loc][religious]

        out_dict[detailed_loc] = row_data


    df = pd.DataFrame.from_dict(out_dict)
    df = df.transpose()
    df.to_csv(os.path.join(ROOT_PATH, 'statistics', f'religious_detailed_stats_inner_locs_{cluster}.csv'), sep=',', encoding='utf-8')

In [50]:
list_genres_in_cluster_deteiled_loc('Royal Palace')
list_genres_in_cluster_deteiled_loc('House of the Hurrian Priest')
list_genres_in_cluster_deteiled_loc('House of the High Priest')

Eastern archive
	 narrative 1
	 hymn/prayer 1
	 related Letters 16
	 related Economic 14
	 ritual 2
	 Economic 4
	 related Legal and Juridical 4
Western archive
	 related Economic 5
	 divination 1
	 related Letters 1
nan
	 related Legal and Juridical 11
	 related Economic 12
	 related Letters 5
	 hymn/prayer 2
	 ritual 2
	 related other 2
	 related Inscriptions 1
	 narrative 2
	 Economic 2
	 related Unclassified etc. 1
Southwestern archives
	 hymn/prayer 63
	 related Economic 9
	 ritual 2
	 narrative 4
	 Economic 1
	 related other 1
	 divination 47
	 related Legal and Juridical 1
Central archive
	 Economic 3
	 related Economic 7
	 related Legal and Juridical 16
	 related Letters 3
	 incantation/magic 1
	 related other 1
	 other religious 1
Room 73 archive
	 related Economic 5
	 related Letters 1
	 related Scribal Excercises 1
	 narrative 1
Room 90 archive
	 ritual 1
	 related Legal and Juridical 2
	 related Economic 2
Southern archive
	 related Inscriptions 1
	 related Legal and Juridi

In [46]:
""" Get basic data about an entry (excavation sigl, KTU) """

def get_basic_data(sigl:str):
    df = pd.read_csv(DATABASE, encoding='windows-1250', delimiter=';')

    if 'KTU' in sigl:
        search_in = 'KTU3'
        sigl = sigl.replace('KTU ', '')

    if 'RS' in sigl:
        search_in = 'Excavation Numbers'
        sigl = sigl.replace('RS ' ,'')

    if 'RIH' in sigl:
        search_in = 'Excavation Numbers'
        sigl = sigl.replace('RIH ' ,'')

    for row_id in df.index:
        if search_in == 'Excavation Numbers':
            sigl_numbers = eval(df.iloc[row_id][search_in])
            sigl_nums = []
            for ex in sigl_numbers:
                ex = ex.replace('[', '')
                ex = ex.replace(']', '')
                sigl_nums.append(ex)

        else:
            sigl_nums = [df.iloc[row_id][search_in]]

        if sigl in sigl_nums:
            languages = df.iloc[row_id]['Language']
            location = df.iloc[row_id]['Archive/General area']
            possible_relevant_religious_genre = df.iloc[row_id]['possible relevant (religious) genres']
            ex_sigls = df.iloc[row_id]['Excavation Numbers']
            ktu = df.iloc[row_id]['KTU3']

            return {'KTU': ktu, 'RS/RIH': ex_sigls, 'Language': languages, 'Location': location, 'religious genre': possible_relevant_religious_genre}

In [10]:
""" Extract data for mlk """

list_of_texts_to_process = ['KTU 1.39', 'KTU 1.41', 'KTU 1.43', 'KTU 1.46', 'KTU 1.87', 'KTU 1.90', 'KTU 1.91', 'KTU 1.103', 'KTU 1.105', 'KTU 1.106', 'KTU 1.109', 'KTU 1.111', 'KTU 1.112', 'KTU 1.115', 'KTU 1.119', 'KTU 1.126', 'KTU 1.132', 'KTU 1.139', 'KTU 1.148', 'KTU 1.161','KTU 1.164', 'KTU 1.168', 'KTU 1.173', 'KTU 1.163', 'KTU 1.171', 'KTU 1.123', 'KTU 1.47', 'KTU 1.118', 'KTU 1.170']
print(len(list_of_texts_to_process))

out_dict = {}
row_id = 0

for tablet in list_of_texts_to_process:
    tablet_data = get_basic_data(tablet)
    out_dict[row_id] = tablet_data
    row_id += 1

df = pd.DataFrame.from_dict(out_dict)
df = df.transpose()
df.to_csv(os.path.join(ROOT_PATH, 'statistics', f'data_for_mlk.csv'), sep=',', encoding='utf-8')
