In [4]:
import csv
import pandas as pd

# only contains the vowels we care
# including j as it counts towards the prosodic weight
VOWEL_PROSODIC_WEIGHT_MAPPING = {
    'a': 10,
    'e': 9,
    'i': 8,
    'o': 9,
    'ɯ': 8,
    'u': 8,
    'j': 7
}

# only contains the consonants we care
CONSONANT_PROSODIC_WEIGHT_MAPPING = {
    'ʔ': 1,
    'd': 2,
    's': 3,
    'ɡ': 2,
    'l': 6,
    'n': 5,
    'm': 5,
    'ŋ': 5,
    'b': 2,
    'w': 7,
    'ɢ': 2,
}

# merging both mappings for easier lookup
PROSODIC_WEIGHT_MAPPING = VOWEL_PROSODIC_WEIGHT_MAPPING | CONSONANT_PROSODIC_WEIGHT_MAPPING
LONG = 'ː'

def CalculateProsodicWeight(syllable):
  print(f'calculating prosodic weight for {syllable}')

  total_weight = 0
  first_vowel_found = False
  prev = None

  for segment in syllable:
    if segment in VOWEL_PROSODIC_WEIGHT_MAPPING:
      first_vowel_found = True
    elif segment == LONG:
      print(f'found {LONG} , repeating the previous segment {prev}')
      segment = prev

    if first_vowel_found:
      if segment in PROSODIC_WEIGHT_MAPPING:
        total_weight += PROSODIC_WEIGHT_MAPPING[segment]
      else:
        print(f'segment {segment} is not in the mapping, ignoring')

    prev = segment

  print(f'total weight for {syllable} is {total_weight}')

  return total_weight

df = pd.DataFrame(columns=['data', 'v1', 'v2', 'v3', 'v4'])
with open('dic.txt') as csv_file:
    csv_reader = csv.reader(csv_file)

    # example row: ['有驈有皇 ɢʷɯʔ ɢʷjid ɢʷɯʔ ɡʷaːŋ']
    for row in csv_reader:
      data = row[0].split()
      output_row = [row]

      # syllable starts on index 1
      for syllable in data[1:]:
        output_row.append(CalculateProsodicWeight(syllable))

      df.loc[len(df)] = output_row

df.to_excel("output5.xlsx", sheet_name='Sheet_name_1')

calculating prosodic weight for pɯ
total weight for pɯ is 8
calculating prosodic weight for hŋeːnʔ
found ː , repeating the previous segment e
total weight for hŋeːnʔ is 24
calculating prosodic weight for pɯ
total weight for pɯ is 8
calculating prosodic weight for ɡljɯŋ']
segment ' is not in the mapping, ignoring
segment ] is not in the mapping, ignoring
total weight for ɡljɯŋ'] is 20
calculating prosodic weight for ŋaːlʔ
found ː , repeating the previous segment a
total weight for ŋaːlʔ is 27
calculating prosodic weight for ʔsaŋ
total weight for ʔsaŋ is 15
calculating prosodic weight for ŋaːlʔ
found ː , repeating the previous segment a
total weight for ŋaːlʔ is 27
calculating prosodic weight for qʰaŋʔ']
segment ' is not in the mapping, ignoring
segment ] is not in the mapping, ignoring
total weight for qʰaŋʔ'] is 16
calculating prosodic weight for ɢʷi
total weight for ɢʷi is 8
calculating prosodic weight for laŋ
total weight for laŋ is 15
calculating prosodic weight for ɢʷi
total weight