In [69]:
import lingpy
from lingpy import *
from lingpy.compare.sanity import average_coverage, mutual_coverage_subset, synonymy, mutual_coverage
from lingpy.compare.util import mutual_coverage_check

In [70]:
# http://lingpy.org
# 
# https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0170046
#
# https://academic.oup.com/jole/article/3/2/130/5050100?login=true

In [71]:
# load the wordlist

wl = Wordlist('arawa.tsv')

# count number of languages, number of rows, number of concepts
print("Wordlist has {0} languages and {1} concepts across {2} rows.".format(wl.width, wl.height, len(wl)))

Wordlist has 8 languages and 556 concepts across 2503 rows.


In [72]:
# load list of Doculects

whole_wl = wl.get_dict(concept='SUN')
languages = whole_wl.keys()

In [73]:
# get all indices for concept "hand", `row` refers to the concepts here, while `col` refers to languages

sun = wl.get_dict(row='SUN', entry='FORM')
# look for concept 'Anaconda' for all Doculects
for taxon in languages:
    print('{0:20}'.format(taxon), '  \t', ', '.join(sun[taxon]))



Arawa                  	 mahí
Banawa                 	 mai
Deni                   	 mahi
Jamamadi               	 mahi
Jarawara               	 bahi
Kulina                 	 mahi
Paumari                	 mahi
Sorowaha               	 masici


In [74]:
# Checking Coverage 1

# Check whether a given mutual coverage is fulfilled by the dataset.
# True, if coverage is fulfilled for all language pairs, False if otherwise.

for i in range(300, -1, -1):
    if mutual_coverage_check(wl, i):
        print("Minimal mutual coverage is at {0} concept pairs.".format(i))
        break


Minimal mutual coverage is at 22 concept pairs.


In [75]:
#Checking Coverage 2

# Compute maximal mutual coverage for all language in a wordlist.
# Number of languages for which the coverage could be found as well as a list of all pairings in which this coverage is possible. 
# The list contains the mutual coverage inside each pair and the list of languages.

count, results = mutual_coverage_subset(wl, 140)
coverage, languages = results[0]
print('Found {0} languages with an average mutual coverage of {1}.'.format(count, coverage))

# load the smaller word list
wl_short = Wordlist('arawa.tsv')

# print basic characteristics
print("The new word list has {0} languages ({1}) and {2} concepts across {3} words.".format(wl_short.width, ', '.join(languages),  wl_short.height, len(wl_short)))

Found 6 languages with an average mutual coverage of 219.
The new word list has 8 languages (Banawa, Deni, Jamamadi, Jarawara, Kulina, Sorowaha) and 556 concepts across 2503 words.


In [76]:
synonyms = synonymy(wl)
for (language, concept), count in sorted(
    synonyms.items(), key=lambda x: x[1], reverse=True):
    if count >= 4:
        print('{0:50}  {1:50}  {2}'.format(language, concept, count))

Deni                                                STRIKE OR BEAT                                      5
Kulina                                              STRIKE OR BEAT                                      5
Paumari                                             STRIKE OR BEAT                                      5
Sorowaha                                            WHITE-LIPPED PECCARY                                4
Deni                                                ANT                                                 4
Jarawara                                            ANT                                                 4
Kulina                                              ANT                                                 4
Deni                                                OLD                                                 4
Jarawara                                            PALM TREE                                           4
Kulina                                        

In [77]:
#Part2
#  Cognate Detection

lex = LexStat('arawa.tsv', segments='tokens', check=True)

# run the dolgopolsky (turchin) analysis, which is threshold-free
lex.cluster(method='turchin')

# show the cognate sets, stored in "turchinid" for the words for "agouti"
sun = lex.get_dict(row='SUN') # get a dictionary with language as key for concept "eight"

for k, v in sun.items():
    if v: 
        #print(v[0])
        idx = v[0] # index of the word, it gives us access to all data
        print("{0:20} \t {1} \t{2}".format(lex[idx, 'doculect'], lex[idx, 'value'], lex[idx, 'turchinid']))

2022-03-15 10:45:24,164 [INFO] No obvious errors found in the data.
SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:45:24,511 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:45:24,513 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:45:24,515 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:45:24,517 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:45:24,520 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:45:24,524 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:45:24,529 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:45:24,533 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:45:24,551 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:45:24,552 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:45:24,555 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:45:24,557 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:45:24,558 [INFO] Analyzing 

2022-03-15 10:45:24,886 [INFO] Analyzing words for concept <DIRTY>.
2022-03-15 10:45:24,893 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:45:24,895 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:45:24,898 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:45:24,900 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:45:24,902 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:45:24,904 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:45:24,906 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:45:24,907 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:45:24,911 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:45:24,912 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:45:24,913 [INFO] Analyzing words for concept <DUST>.
2022-03-15 10:45:24,914 [INFO] Analyzing words for concept <EAGLE OR HAWK>.
2022-03-15 10:45:24,923 [INFO] Analyzing words for concept <EAR>.
2022-03-15 10:45:24,927 [INFO] Analyzing word

2022-03-15 10:45:25,181 [INFO] Analyzing words for concept <ISLAND>.
2022-03-15 10:45:25,183 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:45:25,184 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:45:25,188 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:45:25,195 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:45:25,201 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:45:25,202 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:45:25,205 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:45:25,206 [INFO] Analyzing words for concept <KNEE>.
2022-03-15 10:45:25,210 [INFO] Analyzing words for concept <KNOW>.
2022-03-15 10:45:25,214 [INFO] Analyzing words for concept <LAKE>.
2022-03-15 10:45:25,217 [INFO] Analyzing words for concept <LARGE CATFISH>.
2022-03-15 10:45:25,218 [INFO] Analyzing words for concept <LARGE-BULLET-ANT (subfamily paraponerinae)>.
2022-03-15 10:45:25,219 [INFO] Analyzing words for 

2022-03-15 10:45:25,507 [INFO] Analyzing words for concept <PIERCE>.
2022-03-15 10:45:25,516 [INFO] Analyzing words for concept <PINEAPPLE>.
2022-03-15 10:45:25,518 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:45:25,520 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:45:25,521 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:45:25,522 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:45:25,523 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15 10:45:25,533 [INFO] Analyzing words for concept <POISON>.
2022-03-15 10:45:25,535 [INFO] Analyzing words for concept <POLISH (SOMETHING)>.
2022-03-15 10:45:25,536 [INFO] Analyzing words for concept <PORCUPINE>.
2022-03-15 10:45:25,537 [INFO] Analyzing words for concept <POT>.
2022-03-15 10:45:25,540 [INFO] Analyzing words for concept <POUND TO FLATTEN>.
2022-03-15 10:45:25,542 [INFO] Analyzing words for concept <PRECIOUS>.
2022-03-15 10:45:25,547 [INFO] Analyzing words for concept <PREGNANT>.

2022-03-15 10:45:25,807 [INFO] Analyzing words for concept <SUCK>.
2022-03-15 10:45:25,809 [INFO] Analyzing words for concept <SUN>.
2022-03-15 10:45:25,815 [INFO] Analyzing words for concept <SWEAT>.
2022-03-15 10:45:25,819 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
2022-03-15 10:45:25,822 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:45:25,824 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:45:25,830 [INFO] Analyzing words for concept <SWIM>.
2022-03-15 10:45:25,831 [INFO] Analyzing words for concept <TAIL>.
2022-03-15 10:45:25,833 [INFO] Analyzing words for concept <TAKE>.
2022-03-15 10:45:25,835 [INFO] Analyzing words for concept <TAPIR>.
2022-03-15 10:45:25,837 [INFO] Analyzing words for concept <TASTE (SOMETHING)>.
2022-03-15 10:45:25,838 [INFO] Analyzing words for concept <TASTY>.
2022-03-15 10:45:25,839 [INFO] Analyzing words for concept <TEACH>.
2022-03-15 10:45:25,839 [INFO] Analyzing words for concept <TELL>.
2022-03-15 10:45:25,841 

Arawa                	 mahí 	1594
Banawa               	 mai 	1595
Deni                 	 mahi 	1594
Jamamadi             	 mahi 	1594
Jarawara             	 bahi 	1598
Kulina               	 mahi 	1594
Paumari              	 mahi 	1594
Sorowaha             	 masici 	1601


In [78]:
#average word length per taxon or in aggregated form (diversity/sounds)

most_freq_wlength = lex.get_frequencies(ftype='wordlength', ref='tokens', aggregated=False)
dict(sorted(most_freq_wlength.items(), key=lambda item: item[1], reverse=True))

{'Arawa': 5.305555555555555,
 'Sorowaha': 5.294685990338165,
 'Paumari': 4.973484848484849,
 'Deni': 4.829896907216495,
 'Kulina': 4.576732673267327,
 'Jarawara': 4.43855421686747,
 'Jamamadi': 4.4,
 'Banawa': 4.318493150684931}

In [79]:
# ?????
most_freq_sounds = lex.get_frequencies(ftype='diversity', ref='cogids', aggregated=False)
print("Cognate diversity of cogids: " + str(most_freq_sounds))

Cognate diversity of cogids: 0.16589625064201335


In [80]:
most_freq_sounds = lex.get_frequencies(ftype='diversity', ref='cogid', aggregated=False)
print("Cognate diversity: " + str(most_freq_sounds))

Cognate diversity: 0.18746789933230612


In [81]:
# Cognates turchin

lex.cluster(method='turchin', ref='turchinid', check=True)
lex.cluster(method="edit-dist", threshold=0.55, ref='editid')

# show the cognate sets, stored in "turchinid" for the words for "sun"
sun = lex.get_dict(row='SUN') # get a dictionary with language as key for concept "sun"

for k, v in sun.items():
    if v:
        idx = v[0] # index of the word, it gives us access to all data
        print("{0:20} \t {1} \t{2}\t {3}".format(lex[idx, 'doculect'], lex[idx, 'value'], 
                                             lex[idx, 'turchinid'],
                                            lex[idx, 'editid']))

SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:45:48,269 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:45:48,270 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:45:48,272 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:45:48,274 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:45:48,277 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:45:48,280 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:45:48,283 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:45:48,287 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:45:48,308 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:45:48,309 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:45:48,312 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:45:48,313 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:45:48,314 [INFO] Analyzing words for concept <ARACUÃ>.
2022-03-15 10:45:48,315 [INFO] Analyzing

2022-03-15 10:45:48,602 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:45:48,603 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:45:48,605 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:45:48,607 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:45:48,608 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:45:48,610 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:45:48,612 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:45:48,612 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:45:48,616 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:45:48,617 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:45:48,618 [INFO] Analyzing words for concept <DUST>.
2022-03-15 10:45:48,619 [INFO] Analyzing words for concept <EAGLE OR HAWK>.
2022-03-15 10:45:48,629 [INFO] Analyzing words for concept <EAR>.
2022-03-15 10:45:48,633 [INFO] Analyzing words for concept <EARTH (SOIL)>.
2022-03-15 10:45:48,635 [INFO] Analyzi

2022-03-15 10:45:48,902 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:45:48,904 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:45:48,907 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:45:48,909 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:45:48,911 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:45:48,912 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:45:48,914 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:45:48,915 [INFO] Analyzing words for concept <KNEE>.
2022-03-15 10:45:48,921 [INFO] Analyzing words for concept <KNOW>.
2022-03-15 10:45:48,922 [INFO] Analyzing words for concept <LAKE>.
2022-03-15 10:45:48,925 [INFO] Analyzing words for concept <LARGE CATFISH>.
2022-03-15 10:45:48,926 [INFO] Analyzing words for concept <LARGE-BULLET-ANT (subfamily paraponerinae)>.
2022-03-15 10:45:48,929 [INFO] Analyzing words for concept <LARVA>.
2022-03-15 10:45:48,932 [INFO] Analyzing words for c

2022-03-15 10:45:49,172 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:45:49,175 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:45:49,178 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:45:49,179 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:45:49,180 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15 10:45:49,181 [INFO] Analyzing words for concept <POISON>.
2022-03-15 10:45:49,183 [INFO] Analyzing words for concept <POLISH (SOMETHING)>.
2022-03-15 10:45:49,185 [INFO] Analyzing words for concept <PORCUPINE>.
2022-03-15 10:45:49,188 [INFO] Analyzing words for concept <POT>.
SEQUENCE CLUSTERING:  63%|██████▎   | 351/556 [00:00<00:00, 411.23it/s]2022-03-15 10:45:49,192 [INFO] Analyzing words for concept <POUND TO FLATTEN>.
2022-03-15 10:45:49,194 [INFO] Analyzing words for concept <PRECIOUS>.
2022-03-15 10:45:49,195 [INFO] Analyzing words for concept <PREGNANT>.
2022-03-15 10:45:49,196 [INFO] Analyzing words for concept <PRETEND>.

2022-03-15 10:45:49,482 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
2022-03-15 10:45:49,485 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:45:49,487 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:45:49,488 [INFO] Analyzing words for concept <SWIM>.
2022-03-15 10:45:49,489 [INFO] Analyzing words for concept <TAIL>.
2022-03-15 10:45:49,491 [INFO] Analyzing words for concept <TAKE>.
2022-03-15 10:45:49,492 [INFO] Analyzing words for concept <TAPIR>.
2022-03-15 10:45:49,495 [INFO] Analyzing words for concept <TASTE (SOMETHING)>.
2022-03-15 10:45:49,495 [INFO] Analyzing words for concept <TASTY>.
2022-03-15 10:45:49,496 [INFO] Analyzing words for concept <TEACH>.
2022-03-15 10:45:49,543 [INFO] Analyzing words for concept <TELL>.
2022-03-15 10:45:49,545 [INFO] Analyzing words for concept <TERMITE>.
2022-03-15 10:45:49,546 [INFO] Analyzing words for concept <TERMITE NEST>.
2022-03-15 10:45:49,548 [INFO] Analyzing words for concept <THAT>.
2022-03-15 10

Column <turchinid> already exists, do you want to override? [y/N] y


SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:45:51,297 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:45:51,298 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:45:51,307 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:45:51,310 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:45:51,313 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:45:51,316 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:45:51,323 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:45:51,327 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:45:51,346 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:45:51,347 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:45:51,351 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:45:51,356 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:45:51,358 [INFO] Analyzing words for concept <ARACUÃ>.
2022-03-15 10:45:51,359 [INFO] Analyzing

2022-03-15 10:45:51,594 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:45:51,596 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:45:51,598 [INFO] Analyzing words for concept <DOVE>.
SEQUENCE CLUSTERING:  21%|██        | 117/556 [00:00<00:01, 406.78it/s]2022-03-15 10:45:51,606 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:45:51,608 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:45:51,609 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:45:51,610 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:45:51,614 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:45:51,615 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:45:51,617 [INFO] Analyzing words for concept <DUST>.
2022-03-15 10:45:51,618 [INFO] Analyzing words for concept <EAGLE OR HAWK>.
2022-03-15 10:45:51,626 [INFO] Analyzing words for concept <EAR>.
2022-03-15 10:45:51,628 [INFO] Analyzing words for concept <EARTH (SOIL)>.
2022-03-15 10:45:51,630 [INFO] Analyzin

2022-03-15 10:45:51,864 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:45:51,868 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:45:51,869 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:45:51,872 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:45:51,873 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:45:51,875 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:45:51,876 [INFO] Analyzing words for concept <KNEE>.
2022-03-15 10:45:51,879 [INFO] Analyzing words for concept <KNOW>.
2022-03-15 10:45:51,880 [INFO] Analyzing words for concept <LAKE>.
2022-03-15 10:45:51,882 [INFO] Analyzing words for concept <LARGE CATFISH>.
2022-03-15 10:45:51,885 [INFO] Analyzing words for concept <LARGE-BULLET-ANT (subfamily paraponerinae)>.
2022-03-15 10:45:51,886 [INFO] Analyzing words for concept <LARVA>.
2022-03-15 10:45:51,888 [INFO] Analyzing words for concept <LATE>.
2022-03-15 10:45:51,889 [INFO] Analyzing words for concep

2022-03-15 10:45:52,127 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:45:52,128 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:45:52,130 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:45:52,131 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:45:52,133 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15 10:45:52,134 [INFO] Analyzing words for concept <POISON>.
2022-03-15 10:45:52,135 [INFO] Analyzing words for concept <POLISH (SOMETHING)>.
2022-03-15 10:45:52,136 [INFO] Analyzing words for concept <PORCUPINE>.
2022-03-15 10:45:52,138 [INFO] Analyzing words for concept <POT>.
2022-03-15 10:45:52,141 [INFO] Analyzing words for concept <POUND TO FLATTEN>.
2022-03-15 10:45:52,143 [INFO] Analyzing words for concept <PRECIOUS>.
2022-03-15 10:45:52,145 [INFO] Analyzing words for concept <PREGNANT>.
2022-03-15 10:45:52,149 [INFO] Analyzing words for concept <PRETEND>.
2022-03-15 10:45:52,156 [INFO] Analyzing words for concept <PROCREATE>

2022-03-15 10:45:52,421 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:45:52,423 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:45:52,425 [INFO] Analyzing words for concept <SWIM>.
2022-03-15 10:45:52,426 [INFO] Analyzing words for concept <TAIL>.
2022-03-15 10:45:52,428 [INFO] Analyzing words for concept <TAKE>.
2022-03-15 10:45:52,430 [INFO] Analyzing words for concept <TAPIR>.
2022-03-15 10:45:52,431 [INFO] Analyzing words for concept <TASTE (SOMETHING)>.
2022-03-15 10:45:52,432 [INFO] Analyzing words for concept <TASTY>.
2022-03-15 10:45:52,435 [INFO] Analyzing words for concept <TEACH>.
2022-03-15 10:45:52,436 [INFO] Analyzing words for concept <TELL>.
2022-03-15 10:45:52,437 [INFO] Analyzing words for concept <TERMITE>.
2022-03-15 10:45:52,439 [INFO] Analyzing words for concept <TERMITE NEST>.
2022-03-15 10:45:52,440 [INFO] Analyzing words for concept <THAT>.
2022-03-15 10:45:52,441 [INFO] Analyzing words for concept <THERE>.
2022-03-15 10:45:52,442 [

Arawa                	 mahí 	1594	 1411
Banawa               	 mai 	1595	 1411
Deni                 	 mahi 	1594	 1411
Jamamadi             	 mahi 	1594	 1411
Jarawara             	 bahi 	1598	 1411
Kulina               	 mahi 	1594	 1411
Paumari              	 mahi 	1594	 1411
Sorowaha             	 masici 	1601	 1411




In [82]:
 # Cognates sca

lex.cluster(method="sca", threshold=0.45, ref='scaid')


for k, v in sun.items():
    if v:
        idx = v[0] 
        print("{0:20} \t {1} \t{2} \t {3} \t {4}".format(
        lex[idx, 'doculect'], 
        lex[idx, 'value'], 
        lex[idx, 'turchinid'], 
        lex[idx, 'scaid'],
        lex[idx, 'editid']))

SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:45:56,518 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:45:56,520 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:45:56,524 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:45:56,527 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:45:56,532 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:45:56,542 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:45:56,545 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:45:56,550 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:45:56,580 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:45:56,581 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:45:56,591 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:45:56,593 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:45:56,595 [INFO] Analyzing words for concept <ARACUÃ>.
2022-03-15 10:45:56,598 [INFO] Analyzing

2022-03-15 10:45:57,029 [INFO] Analyzing words for concept <DIRTY>.
2022-03-15 10:45:57,032 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:45:57,033 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:45:57,036 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:45:57,039 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:45:57,043 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:45:57,046 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:45:57,048 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:45:57,050 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:45:57,056 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:45:57,060 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:45:57,061 [INFO] Analyzing words for concept <DUST>.
2022-03-15 10:45:57,067 [INFO] Analyzing words for concept <EAGLE OR HAWK>.
2022-03-15 10:45:57,092 [INFO] Analyzing words for concept <EAR>.
SEQUENCE CLUSTERING:  23%|██▎       | 126/556

2022-03-15 10:45:57,498 [INFO] Analyzing words for concept <INSIDE>.
2022-03-15 10:45:57,505 [INFO] Analyzing words for concept <ISLAND>.
2022-03-15 10:45:57,506 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:45:57,509 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:45:57,514 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:45:57,516 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:45:57,518 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:45:57,527 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:45:57,530 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:45:57,533 [INFO] Analyzing words for concept <KNEE>.
SEQUENCE CLUSTERING:  42%|████▏     | 236/556 [00:01<00:01, 250.08it/s]2022-03-15 10:45:57,545 [INFO] Analyzing words for concept <KNOW>.
2022-03-15 10:45:57,549 [INFO] Analyzing words for concept <LAKE>.
2022-03-15 10:45:57,553 [INFO] Analyzing words for concept <LARGE CATFISH>.
2022-03-15 10:45

2022-03-15 10:45:57,902 [INFO] Analyzing words for concept <PIECE>.
2022-03-15 10:45:57,905 [INFO] Analyzing words for concept <PIED-CRESTED OROPENDOLA>.
2022-03-15 10:45:57,907 [INFO] Analyzing words for concept <PIERCE>.
2022-03-15 10:45:57,913 [INFO] Analyzing words for concept <PINEAPPLE>.
2022-03-15 10:45:57,916 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:45:57,917 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:45:57,918 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:45:57,920 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:45:57,922 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15 10:45:57,923 [INFO] Analyzing words for concept <POISON>.
2022-03-15 10:45:57,925 [INFO] Analyzing words for concept <POLISH (SOMETHING)>.
2022-03-15 10:45:57,926 [INFO] Analyzing words for concept <PORCUPINE>.
2022-03-15 10:45:57,928 [INFO] Analyzing words for concept <POT>.
2022-03-15 10:45:57,933 [INFO] Analyzing words for concept <POUND 

2022-03-15 10:45:58,243 [INFO] Analyzing words for concept <STRIKE OR BEAT>.
2022-03-15 10:45:58,286 [INFO] Analyzing words for concept <STRONG TASTE>.
2022-03-15 10:45:58,288 [INFO] Analyzing words for concept <SUCK>.
2022-03-15 10:45:58,295 [INFO] Analyzing words for concept <SUN>.
2022-03-15 10:45:58,299 [INFO] Analyzing words for concept <SWEAT>.
2022-03-15 10:45:58,300 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
2022-03-15 10:45:58,305 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:45:58,308 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:45:58,309 [INFO] Analyzing words for concept <SWIM>.
2022-03-15 10:45:58,311 [INFO] Analyzing words for concept <TAIL>.
SEQUENCE CLUSTERING:  83%|████████▎ | 461/556 [00:01<00:00, 277.87it/s]2022-03-15 10:45:58,315 [INFO] Analyzing words for concept <TAKE>.
2022-03-15 10:45:58,317 [INFO] Analyzing words for concept <TAPIR>.
2022-03-15 10:45:58,322 [INFO] Analyzing words for concept <TASTE (SOMETHING)>.
2022

Arawa                	 mahí 	1594 	 1292 	 1411
Banawa               	 mai 	1595 	 1292 	 1411
Deni                 	 mahi 	1594 	 1292 	 1411
Jamamadi             	 mahi 	1594 	 1292 	 1411
Jarawara             	 bahi 	1598 	 1292 	 1411
Kulina               	 mahi 	1594 	 1292 	 1411
Paumari              	 mahi 	1594 	 1292 	 1411
Sorowaha             	 masici 	1601 	 1292 	 1411


In [83]:
lex.get_scorer(runs=10000)
lex.cluster(method="lexstat", threshold=0.55, ref="infomap", cluster_method='infomap')


for k, v in sun.items():
    if v:
        idx = v[0] 
        force=True
        print("{0:30} \t {1} \t{2} \t {3} \t {4} \t {5}".format(
            lex[idx, 'doculect'], 
            lex[idx, 'value'], 
            lex[idx, 'turchinid'], 
            lex[idx, 'scaid'],
            lex[idx, 'lexstatid'],
            lex[idx, 'infomap']))

CORRESPONDENCE CALCULATION:   0%|          | 0/32.0 [00:00<?, ?it/s]2022-03-15 10:46:07,319 [INFO] Calculating alignments for pair Arawa / Arawa.
2022-03-15 10:46:07,328 [INFO] Calculating alignments for pair Arawa / Banawa.
2022-03-15 10:46:07,334 [INFO] Calculating alignments for pair Arawa / Deni.
2022-03-15 10:46:07,344 [INFO] Calculating alignments for pair Arawa / Jamamadi.
2022-03-15 10:46:07,351 [INFO] Calculating alignments for pair Arawa / Jarawara.
2022-03-15 10:46:07,360 [INFO] Calculating alignments for pair Arawa / Kulina.
2022-03-15 10:46:07,374 [INFO] Calculating alignments for pair Arawa / Paumari.
2022-03-15 10:46:07,382 [INFO] Calculating alignments for pair Arawa / Sorowaha.
2022-03-15 10:46:07,392 [INFO] Calculating alignments for pair Banawa / Banawa.
CORRESPONDENCE CALCULATION:  31%|███▏      | 10/32.0 [00:00<00:00, 70.17it/s]2022-03-15 10:46:07,462 [INFO] Calculating alignments for pair Banawa / Deni.
2022-03-15 10:46:07,528 [INFO] Calculating alignments for pai

RANDOM CORRESPONDENCE CALCULATION:  88%|████████▊ | 28/32.0 [00:39<00:08,  2.14s/it]2022-03-15 10:46:48,515 [INFO] Calculating random alignmentsfor pair Jarawara/Kulina.
RANDOM CORRESPONDENCE CALCULATION:  91%|█████████ | 29/32.0 [00:40<00:05,  1.99s/it]2022-03-15 10:46:50,149 [INFO] Calculating random alignmentsfor pair Jarawara/Paumari.
RANDOM CORRESPONDENCE CALCULATION:  94%|█████████▍| 30/32.0 [00:42<00:03,  1.94s/it]2022-03-15 10:46:51,986 [INFO] Calculating random alignmentsfor pair Jarawara/Sorowaha.
RANDOM CORRESPONDENCE CALCULATION:  97%|█████████▋| 31/32.0 [00:44<00:01,  1.90s/it]2022-03-15 10:46:53,783 [INFO] Calculating random alignmentsfor pair Kulina/Kulina.
RANDOM CORRESPONDENCE CALCULATION: 100%|██████████| 32/32.0 [00:46<00:00,  1.89s/it]2022-03-15 10:46:55,658 [INFO] Calculating random alignmentsfor pair Kulina/Paumari.
RANDOM CORRESPONDENCE CALCULATION: 33it [00:48,  1.90s/it]                          2022-03-15 10:46:57,578 [INFO] Calculating random alignmentsfor pa

2022-03-15 10:47:06,497 [INFO] Analyzing words for concept <COCKROACH>.
2022-03-15 10:47:06,497 [INFO] Analyzing words for concept <COLD>.
2022-03-15 10:47:06,508 [INFO] Analyzing words for concept <COMB>.
2022-03-15 10:47:06,510 [INFO] Analyzing words for concept <COME>.
2022-03-15 10:47:06,512 [INFO] Analyzing words for concept <COOK (SOMETHING)>.
2022-03-15 10:47:06,515 [INFO] Analyzing words for concept <CORPSE>.
2022-03-15 10:47:06,516 [INFO] Analyzing words for concept <CORRECT (RIGHT)>.
2022-03-15 10:47:06,517 [INFO] Analyzing words for concept <COTTON>.
2022-03-15 10:47:06,520 [INFO] Analyzing words for concept <COUGH>.
2022-03-15 10:47:06,525 [INFO] Analyzing words for concept <CRAB>.
2022-03-15 10:47:06,527 [INFO] Analyzing words for concept <CRICKET>.
2022-03-15 10:47:06,528 [INFO] Analyzing words for concept <CRY>.
2022-03-15 10:47:06,531 [INFO] Analyzing words for concept <CURASSOW>.
2022-03-15 10:47:06,532 [INFO] Analyzing words for concept <CUT>.
SEQUENCE CLUSTERING:  19

2022-03-15 10:47:07,048 [INFO] Analyzing words for concept <HOLD OR TAKE>.
2022-03-15 10:47:07,050 [INFO] Analyzing words for concept <HOLE>.
2022-03-15 10:47:07,055 [INFO] Analyzing words for concept <HOME>.
2022-03-15 10:47:07,058 [INFO] Analyzing words for concept <HOOK>.
2022-03-15 10:47:07,059 [INFO] Analyzing words for concept <HOPLIAS (GENUS)>.
2022-03-15 10:47:07,060 [INFO] Analyzing words for concept <HORN (ANATOMY)>.
2022-03-15 10:47:07,065 [INFO] Analyzing words for concept <HORSEFLY>.
2022-03-15 10:47:07,069 [INFO] Analyzing words for concept <HOT>.
2022-03-15 10:47:07,070 [INFO] Analyzing words for concept <HOUSE>.
2022-03-15 10:47:07,072 [INFO] Analyzing words for concept <HOUSE GIRDER>.
2022-03-15 10:47:07,074 [INFO] Analyzing words for concept <HOWLER MONKEY>.
2022-03-15 10:47:07,079 [INFO] Analyzing words for concept <HUNGER>.
2022-03-15 10:47:07,082 [INFO] Analyzing words for concept <HUSBAND>.
2022-03-15 10:47:07,088 [INFO] Analyzing words for concept <I>.
2022-03-15

2022-03-15 10:47:07,548 [INFO] Analyzing words for concept <PADDLE>.
2022-03-15 10:47:07,552 [INFO] Analyzing words for concept <PAIN>.
2022-03-15 10:47:07,556 [INFO] Analyzing words for concept <PALM SPECIES WITH THORNS>.
2022-03-15 10:47:07,564 [INFO] Analyzing words for concept <PALM TREE>.
2022-03-15 10:47:07,584 [INFO] Analyzing words for concept <PAN>.
2022-03-15 10:47:07,585 [INFO] Analyzing words for concept <PAPAYA>.
2022-03-15 10:47:07,586 [INFO] Analyzing words for concept <PASSION FRUIT>.
2022-03-15 10:47:07,589 [INFO] Analyzing words for concept <PATAUÁ PALM>.
2022-03-15 10:47:07,593 [INFO] Analyzing words for concept <PATERNAL AUNT (FATHER'S SISTER)>.
2022-03-15 10:47:07,612 [INFO] Analyzing words for concept <PATERNAL UNCLE (FATHER'S BROTHER)>.
2022-03-15 10:47:07,615 [INFO] Analyzing words for concept <PATH OR ROAD>.
2022-03-15 10:47:07,628 [INFO] Analyzing words for concept <PATTERN>.
2022-03-15 10:47:07,631 [INFO] Analyzing words for concept <PAU-MULATO>.
2022-03-15 1

2022-03-15 10:47:08,164 [INFO] Analyzing words for concept <SOUND OR NOISE>.
2022-03-15 10:47:08,165 [INFO] Analyzing words for concept <SOUR>.
2022-03-15 10:47:08,169 [INFO] Analyzing words for concept <SPIDER>.
2022-03-15 10:47:08,190 [INFO] Analyzing words for concept <SPIDER MONKEY>.
2022-03-15 10:47:08,196 [INFO] Analyzing words for concept <SPILL (SOMETHING)>.
SEQUENCE CLUSTERING:  78%|███████▊  | 433/556 [00:02<00:00, 166.77it/s]2022-03-15 10:47:08,204 [INFO] Analyzing words for concept <SPIRIT>.
2022-03-15 10:47:08,205 [INFO] Analyzing words for concept <SPIT>.
2022-03-15 10:47:08,207 [INFO] Analyzing words for concept <SPLIT>.
2022-03-15 10:47:08,208 [INFO] Analyzing words for concept <SPREAD OUT>.
2022-03-15 10:47:08,210 [INFO] Analyzing words for concept <SQUEEZE>.
2022-03-15 10:47:08,214 [INFO] Analyzing words for concept <SQUIRREL>.
2022-03-15 10:47:08,219 [INFO] Analyzing words for concept <SQUIRREL MONKEY>.
2022-03-15 10:47:08,221 [INFO] Analyzing words for concept <STAN

2022-03-15 10:47:08,714 [INFO] Analyzing words for concept <WIND>.
2022-03-15 10:47:08,716 [INFO] Analyzing words for concept <WINDOW>.
2022-03-15 10:47:08,718 [INFO] Analyzing words for concept <WING>.
2022-03-15 10:47:08,720 [INFO] Analyzing words for concept <WITH>.
2022-03-15 10:47:08,722 [INFO] Analyzing words for concept <WOMAN>.
2022-03-15 10:47:08,730 [INFO] Analyzing words for concept <WOODPECKER>.
2022-03-15 10:47:08,734 [INFO] Analyzing words for concept <WOUND>.
2022-03-15 10:47:08,737 [INFO] Analyzing words for concept <YAM>.
SEQUENCE CLUSTERING:  99%|█████████▊| 548/556 [00:02<00:00, 199.64it/s]2022-03-15 10:47:08,748 [INFO] Analyzing words for concept <YAWN>.
2022-03-15 10:47:08,750 [INFO] Analyzing words for concept <YEAR>.
2022-03-15 10:47:08,752 [INFO] Analyzing words for concept <YELLOW>.
2022-03-15 10:47:08,753 [INFO] Analyzing words for concept <YES>.
2022-03-15 10:47:08,756 [INFO] Analyzing words for concept <YOU>.
2022-03-15 10:47:08,758 [INFO] Analyzing words fo

Arawa                          	 mahí 	1594 	 1292 	 None 	 816
Banawa                         	 mai 	1595 	 1292 	 None 	 816
Deni                           	 mahi 	1594 	 1292 	 None 	 816
Jamamadi                       	 mahi 	1594 	 1292 	 None 	 816
Jarawara                       	 bahi 	1598 	 1292 	 None 	 816
Kulina                         	 mahi 	1594 	 1292 	 None 	 816
Paumari                        	 mahi 	1594 	 1292 	 None 	 816
Sorowaha                       	 masici 	1601 	 1292 	 None 	 817


In [85]:
lex.get_scorer(runs=10000)
lex.cluster(method='lexstat', threshold=0.60, ref='lexstatid')

lex.get_scorer(runs=10000)
lex.cluster(method="lexstat", threshold=0.55, ref="infomap", cluster_method='infomap')


for k, v in sun.items():
    if v:
        idx = v[0] 
        print("{0:20} \t {1} \t{2} \t {3} \t {4} \t {5}\t {6}".format(
            lex[idx, 'doculect'], 
            lex[idx, 'value'], 
            lex[idx, 'turchinid'], 
            lex[idx, 'scaid'],
            lex[idx, 'lexstatid'],
            lex[idx, 'editid'],
            lex[idx, 'infomap']))
        
lex.output('tsv', filename='lexstat_test')

SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:47:40,249 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:47:40,251 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:47:40,253 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:47:40,259 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:47:40,264 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:47:40,274 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:47:40,277 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:47:40,285 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:47:40,336 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:47:40,339 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:47:40,345 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:47:40,346 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:47:40,347 [INFO] Analyzing words for concept <ARACUÃ>.
2022-03-15 10:47:40,349 [INFO] Analyzing

2022-03-15 10:47:40,791 [INFO] Analyzing words for concept <DEER>.
2022-03-15 10:47:40,794 [INFO] Analyzing words for concept <DIE>.
2022-03-15 10:47:40,796 [INFO] Analyzing words for concept <DIG>.
2022-03-15 10:47:40,798 [INFO] Analyzing words for concept <DIRTY>.
2022-03-15 10:47:40,803 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:47:40,804 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:47:40,809 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:47:40,812 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:47:40,818 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:47:40,826 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:47:40,830 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:47:40,831 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:47:40,843 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:47:40,844 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:47:40,845 [INFO] Analyzing words for conc

2022-03-15 10:47:41,324 [INFO] Analyzing words for concept <INDIGENOUS PEOPLES OF THE AMERICAS>.
2022-03-15 10:47:41,327 [INFO] Analyzing words for concept <INGA>.
2022-03-15 10:47:41,328 [INFO] Analyzing words for concept <INSECT>.
2022-03-15 10:47:41,329 [INFO] Analyzing words for concept <INSIDE>.
SEQUENCE CLUSTERING:  41%|████      | 227/556 [00:01<00:01, 225.40it/s]2022-03-15 10:47:41,341 [INFO] Analyzing words for concept <ISLAND>.
2022-03-15 10:47:41,342 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:47:41,345 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:47:41,353 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:47:41,355 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:47:41,358 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:47:41,360 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:47:41,364 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:47:41,367 [INFO] Analyzing words for concept <

2022-03-15 10:47:41,894 [INFO] Analyzing words for concept <PENIS>.
2022-03-15 10:47:41,895 [INFO] Analyzing words for concept <PEPPER>.
2022-03-15 10:47:41,897 [INFO] Analyzing words for concept <PESTLE>.
2022-03-15 10:47:41,898 [INFO] Analyzing words for concept <PIAU>.
2022-03-15 10:47:41,901 [INFO] Analyzing words for concept <PIECE>.
2022-03-15 10:47:41,907 [INFO] Analyzing words for concept <PIED-CRESTED OROPENDOLA>.
2022-03-15 10:47:41,909 [INFO] Analyzing words for concept <PIERCE>.
2022-03-15 10:47:41,922 [INFO] Analyzing words for concept <PINEAPPLE>.
SEQUENCE CLUSTERING:  62%|██████▏   | 342/556 [00:01<00:01, 205.12it/s]2022-03-15 10:47:41,928 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:47:41,929 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:47:41,931 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:47:41,932 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:47:41,933 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15

2022-03-15 10:47:42,312 [INFO] Analyzing words for concept <STONE>.
2022-03-15 10:47:42,316 [INFO] Analyzing words for concept <STORY>.
2022-03-15 10:47:42,317 [INFO] Analyzing words for concept <STRAIGHTEN>.
2022-03-15 10:47:42,318 [INFO] Analyzing words for concept <STRETCH OUT LEGS>.
2022-03-15 10:47:42,321 [INFO] Analyzing words for concept <STRIKE OR BEAT>.
2022-03-15 10:47:42,377 [INFO] Analyzing words for concept <STRONG TASTE>.
2022-03-15 10:47:42,378 [INFO] Analyzing words for concept <SUCK>.
2022-03-15 10:47:42,382 [INFO] Analyzing words for concept <SUN>.
SEQUENCE CLUSTERING:  82%|████████▏ | 455/556 [00:02<00:00, 220.63it/s]2022-03-15 10:47:42,389 [INFO] Analyzing words for concept <SWEAT>.
2022-03-15 10:47:42,391 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
2022-03-15 10:47:42,396 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:47:42,400 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:47:42,402 [INFO] Analyzing words for concept <SWIM>

Column <lexstatid> already exists, do you want to override? [y/N] y


SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:47:44,958 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:47:44,960 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:47:44,966 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:47:44,973 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:47:44,979 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:47:44,992 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:47:44,997 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:47:45,007 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:47:45,054 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:47:45,056 [INFO] Analyzing words for concept <ANTEATER>.
SEQUENCE CLUSTERING:   2%|▏         | 10/556 [00:00<00:05, 96.15it/s]2022-03-15 10:47:45,064 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:47:45,067 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:47:45,069 [INFO] Analyzing

2022-03-15 10:47:45,537 [INFO] Analyzing words for concept <DEER>.
2022-03-15 10:47:45,543 [INFO] Analyzing words for concept <DIE>.
2022-03-15 10:47:45,544 [INFO] Analyzing words for concept <DIG>.
2022-03-15 10:47:45,547 [INFO] Analyzing words for concept <DIRTY>.
2022-03-15 10:47:45,551 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:47:45,556 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:47:45,560 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:47:45,564 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:47:45,570 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:47:45,574 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:47:45,577 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:47:45,579 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:47:45,588 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:47:45,593 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:47:45,596 [INFO] Analyzing words for conc

2022-03-15 10:47:46,265 [INFO] Analyzing words for concept <IN FRONT OF>.
SEQUENCE CLUSTERING:  40%|████      | 223/556 [00:01<00:01, 173.07it/s]2022-03-15 10:47:46,269 [INFO] Analyzing words for concept <INDIGENOUS PEOPLES OF THE AMERICAS>.
2022-03-15 10:47:46,274 [INFO] Analyzing words for concept <INGA>.
2022-03-15 10:47:46,275 [INFO] Analyzing words for concept <INSECT>.
2022-03-15 10:47:46,277 [INFO] Analyzing words for concept <INSIDE>.
2022-03-15 10:47:46,293 [INFO] Analyzing words for concept <ISLAND>.
2022-03-15 10:47:46,296 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:47:46,309 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:47:46,317 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:47:46,319 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:47:46,329 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:47:46,334 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:47:46,339 [INFO] Analyzing words for concep

2022-03-15 10:47:46,916 [INFO] Analyzing words for concept <PECCARY>.
2022-03-15 10:47:46,918 [INFO] Analyzing words for concept <PEEL>.
2022-03-15 10:47:46,922 [INFO] Analyzing words for concept <PENIS>.
2022-03-15 10:47:46,924 [INFO] Analyzing words for concept <PEPPER>.
2022-03-15 10:47:46,928 [INFO] Analyzing words for concept <PESTLE>.
SEQUENCE CLUSTERING:  61%|██████    | 337/556 [00:01<00:01, 181.52it/s]2022-03-15 10:47:46,931 [INFO] Analyzing words for concept <PIAU>.
2022-03-15 10:47:46,933 [INFO] Analyzing words for concept <PIECE>.
2022-03-15 10:47:46,938 [INFO] Analyzing words for concept <PIED-CRESTED OROPENDOLA>.
2022-03-15 10:47:46,942 [INFO] Analyzing words for concept <PIERCE>.
2022-03-15 10:47:46,950 [INFO] Analyzing words for concept <PINEAPPLE>.
2022-03-15 10:47:46,961 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:47:46,963 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:47:46,965 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-

2022-03-15 10:47:47,420 [INFO] Analyzing words for concept <STING>.
2022-03-15 10:47:47,424 [INFO] Analyzing words for concept <STINGRAY>.
2022-03-15 10:47:47,430 [INFO] Analyzing words for concept <STIR>.
2022-03-15 10:47:47,432 [INFO] Analyzing words for concept <STONE>.
2022-03-15 10:47:47,438 [INFO] Analyzing words for concept <STORY>.
2022-03-15 10:47:47,444 [INFO] Analyzing words for concept <STRAIGHTEN>.
2022-03-15 10:47:47,446 [INFO] Analyzing words for concept <STRETCH OUT LEGS>.
2022-03-15 10:47:47,448 [INFO] Analyzing words for concept <STRIKE OR BEAT>.
2022-03-15 10:47:47,503 [INFO] Analyzing words for concept <STRONG TASTE>.
SEQUENCE CLUSTERING:  81%|████████▏ | 453/556 [00:02<00:00, 186.19it/s]2022-03-15 10:47:47,512 [INFO] Analyzing words for concept <SUCK>.
2022-03-15 10:47:47,526 [INFO] Analyzing words for concept <SUN>.
2022-03-15 10:47:47,533 [INFO] Analyzing words for concept <SWEAT>.
2022-03-15 10:47:47,541 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
20

                                                                       

Column <infomap> already exists, do you want to override? [y/N] y


2022-03-15 10:47:51,311 [INFO] Data has been written to file <lexstat_test.tsv>.


Arawa                	 mahí 	1594 	 1292 	 1377 	 1411	 816
Banawa               	 mai 	1595 	 1292 	 1377 	 1411	 816
Deni                 	 mahi 	1594 	 1292 	 1377 	 1411	 816
Jamamadi             	 mahi 	1594 	 1292 	 1377 	 1411	 816
Jarawara             	 bahi 	1598 	 1292 	 1377 	 1411	 816
Kulina               	 mahi 	1594 	 1292 	 1377 	 1411	 816
Paumari              	 mahi 	1594 	 1292 	 1377 	 1411	 816
Sorowaha             	 masici 	1601 	 1292 	 1384 	 1411	 817


In [86]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "cogid", "scaid")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9587 *
* Recall:        0.9454 *
* F-Scores:      0.9520 *
*************************'


(0.9587404831844618, 0.9453824255454691, 0.952014598748905)

In [87]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "cogid", "lexstatid")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9758 *
* Recall:        0.9367 *
* F-Scores:      0.9559 *
*************************'


(0.9757998974237583, 0.9367186255773216, 0.9558599592739822)

In [88]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "cogid", "infomap")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9725 *
* Recall:        0.9456 *
* F-Scores:      0.9589 *
*************************'


(0.9725126225126224, 0.9456123586558369, 0.9588738625744104)

In [89]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "cogid", "editid")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9716 *
* Recall:        0.8911 *
* F-Scores:      0.9296 *
*************************'


(0.971572960867515, 0.891085363911451, 0.9295901818213397)

In [90]:
#from lingpy.evaluate.acd import bcubes, diff
#wl = Wordlist('lexstat_test.tsv')

#for res in ['turchinid', 'scaid', 'lexstatid', 'infomap','editid']:
#    print('{0:10}\t{1[0]:.2f}\t{1[1]:.2f}\t{1[2]:.2f}\t{1[3]:.2f}\t{1[4]:.2f}'.format(res,bcubes(wl, 'cogid', res, pprint=False)))

In [91]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "turchinid", "infomap")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.8507 *
* Recall:        0.9871 *
* F-Scores:      0.9138 *
*************************'


(0.8507256707256695, 0.987069970845481, 0.9138401942043988)

In [92]:
lex = LexStat(('lexstat_test.tsv'), check=True)
print(lex)

2022-03-15 10:48:22,216 [INFO] No obvious errors found in the data.


<lexstat-model lexstat_test.tsv>


In [93]:
lex.coverage()
#values = number of concepts

{'Arawa': 36,
 'Banawa': 261,
 'Deni': 334,
 'Jamamadi': 260,
 'Jarawara': 358,
 'Kulina': 329,
 'Paumari': 234,
 'Sorowaha': 334}

In [94]:
scorer = lex.get_scorer()


CORRESPONDENCE CALCULATION:   0%|          | 0/32.0 [00:00<?, ?it/s]2022-03-15 10:48:25,828 [INFO] Calculating alignments for pair Arawa / Arawa.
2022-03-15 10:48:25,837 [INFO] Calculating alignments for pair Arawa / Banawa.
2022-03-15 10:48:25,843 [INFO] Calculating alignments for pair Arawa / Deni.
2022-03-15 10:48:25,851 [INFO] Calculating alignments for pair Arawa / Jamamadi.
2022-03-15 10:48:25,859 [INFO] Calculating alignments for pair Arawa / Jarawara.
2022-03-15 10:48:25,867 [INFO] Calculating alignments for pair Arawa / Kulina.
2022-03-15 10:48:25,877 [INFO] Calculating alignments for pair Arawa / Paumari.
2022-03-15 10:48:25,886 [INFO] Calculating alignments for pair Arawa / Sorowaha.
2022-03-15 10:48:25,899 [INFO] Calculating alignments for pair Banawa / Banawa.
CORRESPONDENCE CALCULATION:  31%|███▏      | 10/32.0 [00:00<00:00, 74.57it/s]2022-03-15 10:48:25,963 [INFO] Calculating alignments for pair Banawa / Deni.
2022-03-15 10:48:26,030 [INFO] Calculating alignments for pai

RANDOM CORRESPONDENCE CALCULATION:  91%|█████████ | 29/32.0 [00:05<00:00,  5.08it/s]2022-03-15 10:48:33,509 [INFO] Calculating random alignmentsfor pair Jarawara/Paumari.
RANDOM CORRESPONDENCE CALCULATION:  94%|█████████▍| 30/32.0 [00:05<00:00,  5.08it/s]2022-03-15 10:48:33,705 [INFO] Calculating random alignmentsfor pair Jarawara/Sorowaha.
RANDOM CORRESPONDENCE CALCULATION:  97%|█████████▋| 31/32.0 [00:06<00:00,  5.00it/s]2022-03-15 10:48:33,912 [INFO] Calculating random alignmentsfor pair Kulina/Kulina.
RANDOM CORRESPONDENCE CALCULATION: 100%|██████████| 32/32.0 [00:06<00:00,  4.90it/s]2022-03-15 10:48:34,127 [INFO] Calculating random alignmentsfor pair Kulina/Paumari.
RANDOM CORRESPONDENCE CALCULATION: 33it [00:06,  4.97it/s]                          2022-03-15 10:48:34,322 [INFO] Calculating random alignmentsfor pair Kulina/Sorowaha.
RANDOM CORRESPONDENCE CALCULATION: 34it [00:06,  4.78it/s]2022-03-15 10:48:34,549 [INFO] Calculating random alignmentsfor pair Paumari/Paumari.
RANDOM

In [95]:
lex.cluster(method="sca", threshold=0.45, ref="cognates")

SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:48:37,942 [INFO] Analyzing words for concept <ABIU>.
2022-03-15 10:48:37,947 [INFO] Analyzing words for concept <ABOVE>.
2022-03-15 10:48:37,949 [INFO] Analyzing words for concept <ACAI PALM>.
2022-03-15 10:48:37,954 [INFO] Analyzing words for concept <AGOUTI>.
2022-03-15 10:48:37,961 [INFO] Analyzing words for concept <ALL>.
2022-03-15 10:48:37,966 [INFO] Analyzing words for concept <ALLIGATOR>.
2022-03-15 10:48:37,969 [INFO] Analyzing words for concept <ANIMAL>.
2022-03-15 10:48:37,975 [INFO] Analyzing words for concept <ANT>.
2022-03-15 10:48:38,007 [INFO] Analyzing words for concept <ANT (SPECIES)>.
2022-03-15 10:48:38,008 [INFO] Analyzing words for concept <ANTEATER>.
2022-03-15 10:48:38,017 [INFO] Analyzing words for concept <ANUS>.
2022-03-15 10:48:38,024 [INFO] Analyzing words for concept <APUNÃ>.
2022-03-15 10:48:38,028 [INFO] Analyzing words for concept <ARACUÃ>.
2022-03-15 10:48:38,029 [INFO] Analyzing

2022-03-15 10:48:38,476 [INFO] Analyzing words for concept <DIG>.
2022-03-15 10:48:38,477 [INFO] Analyzing words for concept <DIRTY>.
2022-03-15 10:48:38,480 [INFO] Analyzing words for concept <DISAPPEAR>.
2022-03-15 10:48:38,482 [INFO] Analyzing words for concept <DOG>.
2022-03-15 10:48:38,487 [INFO] Analyzing words for concept <DOOR>.
2022-03-15 10:48:38,490 [INFO] Analyzing words for concept <DOVE>.
2022-03-15 10:48:38,493 [INFO] Analyzing words for concept <DREAM>.
2022-03-15 10:48:38,497 [INFO] Analyzing words for concept <DRINK>.
2022-03-15 10:48:38,500 [INFO] Analyzing words for concept <DROWN>.
2022-03-15 10:48:38,502 [INFO] Analyzing words for concept <DRY>.
2022-03-15 10:48:38,510 [INFO] Analyzing words for concept <DRY UP>.
2022-03-15 10:48:38,511 [INFO] Analyzing words for concept <DULL>.
2022-03-15 10:48:38,511 [INFO] Analyzing words for concept <DUST>.
2022-03-15 10:48:38,513 [INFO] Analyzing words for concept <EAGLE OR HAWK>.
2022-03-15 10:48:38,532 [INFO] Analyzing word

2022-03-15 10:48:38,916 [INFO] Analyzing words for concept <INSECT>.
2022-03-15 10:48:38,919 [INFO] Analyzing words for concept <INSIDE>.
2022-03-15 10:48:38,927 [INFO] Analyzing words for concept <ISLAND>.
2022-03-15 10:48:38,928 [INFO] Analyzing words for concept <JACKFRUIT>.
2022-03-15 10:48:38,930 [INFO] Analyzing words for concept <JAGUAR>.
2022-03-15 10:48:38,936 [INFO] Analyzing words for concept <JUMP>.
2022-03-15 10:48:38,938 [INFO] Analyzing words for concept <KAPOK CEIBE TREE>.
2022-03-15 10:48:38,941 [INFO] Analyzing words for concept <KILL>.
2022-03-15 10:48:38,946 [INFO] Analyzing words for concept <KINGFISHER>.
2022-03-15 10:48:38,948 [INFO] Analyzing words for concept <KINKAJOU>.
2022-03-15 10:48:38,951 [INFO] Analyzing words for concept <KNEE>.
2022-03-15 10:48:38,958 [INFO] Analyzing words for concept <KNOW>.
2022-03-15 10:48:38,961 [INFO] Analyzing words for concept <LAKE>.
2022-03-15 10:48:38,965 [INFO] Analyzing words for concept <LARGE CATFISH>.
2022-03-15 10:48:3

2022-03-15 10:48:39,426 [INFO] Analyzing words for concept <PIAU>.
2022-03-15 10:48:39,427 [INFO] Analyzing words for concept <PIECE>.
2022-03-15 10:48:39,430 [INFO] Analyzing words for concept <PIED-CRESTED OROPENDOLA>.
2022-03-15 10:48:39,431 [INFO] Analyzing words for concept <PIERCE>.
2022-03-15 10:48:39,441 [INFO] Analyzing words for concept <PINEAPPLE>.
2022-03-15 10:48:39,444 [INFO] Analyzing words for concept <PIRANHA>.
2022-03-15 10:48:39,446 [INFO] Analyzing words for concept <PISS>.
2022-03-15 10:48:39,447 [INFO] Analyzing words for concept <PLACE (POSITION)>.
2022-03-15 10:48:39,449 [INFO] Analyzing words for concept <PLAY>.
2022-03-15 10:48:39,450 [INFO] Analyzing words for concept <PLUCK>.
2022-03-15 10:48:39,452 [INFO] Analyzing words for concept <POISON>.
2022-03-15 10:48:39,455 [INFO] Analyzing words for concept <POLISH (SOMETHING)>.
2022-03-15 10:48:39,458 [INFO] Analyzing words for concept <PORCUPINE>.
2022-03-15 10:48:39,462 [INFO] Analyzing words for concept <POT>.

2022-03-15 10:48:39,847 [INFO] Analyzing words for concept <STRETCH OUT LEGS>.
2022-03-15 10:48:39,849 [INFO] Analyzing words for concept <STRIKE OR BEAT>.
2022-03-15 10:48:39,882 [INFO] Analyzing words for concept <STRONG TASTE>.
2022-03-15 10:48:39,883 [INFO] Analyzing words for concept <SUCK>.
2022-03-15 10:48:39,886 [INFO] Analyzing words for concept <SUN>.
2022-03-15 10:48:39,892 [INFO] Analyzing words for concept <SWEAT>.
2022-03-15 10:48:39,894 [INFO] Analyzing words for concept <SWEAT (SUBSTANCE)>.
2022-03-15 10:48:39,900 [INFO] Analyzing words for concept <SWEET MANIOC>.
2022-03-15 10:48:39,902 [INFO] Analyzing words for concept <SWELL>.
2022-03-15 10:48:39,908 [INFO] Analyzing words for concept <SWIM>.
2022-03-15 10:48:39,909 [INFO] Analyzing words for concept <TAIL>.
2022-03-15 10:48:39,913 [INFO] Analyzing words for concept <TAKE>.
2022-03-15 10:48:39,915 [INFO] Analyzing words for concept <TAPIR>.
2022-03-15 10:48:39,919 [INFO] Analyzing words for concept <TASTE (SOMETHING

In [96]:
lex.output('tsv', filename="results", ignore="all", prettify=False)

2022-03-15 10:48:59,837 [INFO] Data has been written to file <results.tsv>.


In [97]:
from lingpy.evaluate.acd import bcubes, diff
bcubes(lex, "cogid", "cognates")

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9587 *
* Recall:        0.9430 *
* F-Scores:      0.9508 *
*************************'


(0.958740483184462, 0.9429984870202267, 0.950804331459516)

In [98]:
alm = Alignments(lex, ref='COGID')
alm.align()

2022-03-15 10:49:02,850 [INFO] 
	7 1 7
	0 7 1 7 0 0
2022-03-15 10:49:02,852 [INFO] 
	0 7 1 7 0 0
	0 0 1 7 1 7
2022-03-15 10:49:02,854 [INFO] 
	[0, 5, 1, 7, 0, 2]
2022-03-15 10:49:02,872 [INFO] 
	7 4 7
	7 4 7 0 0 0 0
2022-03-15 10:49:02,875 [INFO] 
	7 4 7 0 0 0 0
	7 4 7 6 7 0 0
2022-03-15 10:49:02,878 [INFO] 
	7 4 7 2 2 0 0
	0 4 7 0 7 0 0
2022-03-15 10:49:02,882 [INFO] 
	5 4 7 2 4 0 0
	7 4 7 1 7 4 7
2022-03-15 10:49:02,889 [INFO] 
	7 4 7 0 7
	7 4 7 6 7
2022-03-15 10:49:02,899 [INFO] 
	7 3 7 0 0
	7 1 7 4 7
2022-03-15 10:49:02,904 [INFO] 
	7 6 7 1 3 7
	7 6 7 1 0 7
2022-03-15 10:49:02,908 [INFO] 
	7 1 7 4 7
	7 1 7 0 0
2022-03-15 10:49:02,916 [INFO] 
	0 0 7 3 7
	0 0 7 2 7
2022-03-15 10:49:02,919 [INFO] 
	0 0 7 3 7
	1 7 7 3 7
2022-03-15 10:49:02,924 [INFO] 
	0 7 1 7 0
	1 7 1 7 7
2022-03-15 10:49:02,925 [INFO] 
	1 7 1 7 4
	0 7 3 7 0
2022-03-15 10:49:02,928 [INFO] 
	0 7 2 7 2
	0 7 2 7 0
2022-03-15 10:49:02,934 [INFO] 
	1 7 7 0 0
	1 7 7 2 7
2022-03-15 10:49:02,943 [INFO] 
	1 7 1 7 0 0 0 0
	1 7 

2022-03-15 10:49:03,185 [INFO] 
	7 4 4 7 6 7
	7 4 0 7 6 7
2022-03-15 10:49:03,191 [INFO] 
	7 3 7 0 0 0 0 0
	7 2 7 0 0 0 0 0
2022-03-15 10:49:03,193 [INFO] 
	7 3 7 0 0 0 0 0
	7 3 7 9 6 7 1 7
2022-03-15 10:49:03,195 [INFO] 
	[7, 3, 7, 0, 0, 0, 0, 0, 3, 2, 2, 0, 2]
2022-03-15 10:49:03,201 [INFO] 
	6 7 1 7
	6 7 0 7
2022-03-15 10:49:03,204 [INFO] 
	6 7 0 7
	6 7 1 7
2022-03-15 10:49:03,220 [INFO] 
	0 3 7 1 7 0 0
	0 3 7 3 7 4 7
2022-03-15 10:49:03,223 [INFO] 
	0 6 7 3 7 0 0
	0 6 7 3 7 0 0
2022-03-15 10:49:03,228 [INFO] 
	7 3 7 3 7 0 0
	0 3 7 2 7 2 4
2022-03-15 10:49:03,236 [INFO] 
	2 3 7 2 7 1 2
	0 2 7 1 7 0 0
2022-03-15 10:49:03,238 [INFO] 
	0 6 7 3 7 0 0
	0 6 7 0 7 0 0
2022-03-15 10:49:03,240 [INFO] 
	2 3 7 2 7 1 2
	0 6 7 1 7 0 0
2022-03-15 10:49:03,246 [INFO] 
	1 3 7 2 7 1 1
	0 6 7 2 7 0 0
2022-03-15 10:49:03,250 [INFO] 
	1 4 7 2 7 1 1
	0 6 7 3 7 0 0
2022-03-15 10:49:03,257 [INFO] 
	0 0 6 7 6 7
	1 7 6 7 3 7
2022-03-15 10:49:03,266 [INFO] 
	6 7 1 7 5 7
	6 7 1 7 0 0
2022-03-15 10:49:03,269 [

2022-03-15 10:49:03,525 [INFO] 
	3 7 6 7 0 0
	1 7 4 7 1 7
2022-03-15 10:49:03,528 [INFO] 
	2 7 5 7 1 4
	3 7 0 0 1 7
2022-03-15 10:49:03,540 [INFO] 
	2 7 3 7
	1 7 0 0
2022-03-15 10:49:03,544 [INFO] 
	1 7 5 7 5 7
	1 7 0 0 0 0
2022-03-15 10:49:03,553 [INFO] 
	3 7 0 7
	3 7 6 7
2022-03-15 10:49:03,555 [INFO] 
	3 7 3 7
	0 0 6 7
2022-03-15 10:49:03,564 [INFO] 
	3 7 1 7 5 7 7
	3 7 1 7 5 0 7
2022-03-15 10:49:03,566 [INFO] 
	3 7 1 7 5 4 7
	2 7 1 7 5 7 0
2022-03-15 10:49:03,568 [INFO] 
	3 7 1 7 5 5 5
	3 7 1 7 5 7 0
2022-03-15 10:49:03,580 [INFO] 
	1 3 7 5 7
	2 0 7 5 7
2022-03-15 10:49:03,585 [INFO] 
	2 2 7 5 7
	3 0 7 5 7
2022-03-15 10:49:03,588 [INFO] 
	2 1 7 5 7
	3 0 7 5 7
2022-03-15 10:49:03,600 [INFO] 
	0 0 0 0 3 7 5 7 0 0
	1 7 3 7 3 7 5 7 0 0
2022-03-15 10:49:03,602 [INFO] 
	1 4 2 4 3 7 5 7 0 0
	0 0 0 0 2 7 5 7 4 7
2022-03-15 10:49:03,605 [INFO] 
	0 2 1 2 3 7 5 7 1 2
	0 0 0 0 2 7 5 7 0 0
2022-03-15 10:49:03,616 [INFO] 
	1 7 3 7 0 0 0
	1 7 3 7 9 7 5 7
2022-03-15 10:49:03,726 [INFO] 
	[1, 7, 3,

ValueError: [!] your sequence contains only unknown characters

In [99]:
alm.output('html', filename="KSL")

2022-03-15 10:49:06,466 [INFO] Data has been written to file </var/folders/95/ts64rjqd6y15_bn9rt_7dcv40000gn/T/tmptr6xb685.alm>.
2022-03-15 10:49:06,591 [INFO] Data has been written to file <KSL.html>.


In [100]:
lex.calculate('tree', ref='cognates')
print(lex.tree.asciiArt())

2022-03-15 10:49:07,275 [INFO] Successfully calculated tree.


          /-Sorowaha
         |
         |                              /-Banawa
         |                    /edge.1--|
-root----|                   |         |          /-Jamamadi
         |                   |          \edge.0--|
         |          /edge.3--|                    \-Jarawara
         |         |         |
         |         |         |          /-Deni
          \edge.5--|          \edge.2--|
                   |                    \-Kulina
                   |
                   |          /-Arawa
                    \edge.4--|
                              \-Paumari


In [101]:
lex.calculate('dst', ref='cognates')
print(lex.tree.asciiArt())

2022-03-15 10:49:08,376 [INFO] Successfully calculated dst.


          /-Sorowaha
         |
         |                              /-Banawa
         |                    /edge.1--|
-root----|                   |         |          /-Jamamadi
         |                   |          \edge.0--|
         |          /edge.3--|                    \-Jarawara
         |         |         |
         |         |         |          /-Deni
          \edge.5--|          \edge.2--|
                   |                    \-Kulina
                   |
                   |          /-Arawa
                    \edge.4--|
                              \-Paumari


In [102]:
lex.calculate('cluster', ref='cognates')
print(lex.tree.asciiArt())

2022-03-15 10:49:09,192 [INFO] Successfully calculated cluster.


          /-Sorowaha
         |
         |                              /-Banawa
         |                    /edge.1--|
-root----|                   |         |          /-Jamamadi
         |                   |          \edge.0--|
         |          /edge.3--|                    \-Jarawara
         |         |         |
         |         |         |          /-Deni
          \edge.5--|          \edge.2--|
                   |                    \-Kulina
                   |
                   |          /-Arawa
                    \edge.4--|
                              \-Paumari


In [103]:
lex.calculate('UPGMA', ref='cognates')
print(lex.tree.asciiArt())

2022-03-15 10:49:14,054 [INFO] Successfully calculated UPGMA.


          /-Sorowaha
         |
         |                              /-Banawa
         |                    /edge.1--|
-root----|                   |         |          /-Jamamadi
         |                   |          \edge.0--|
         |          /edge.3--|                    \-Jarawara
         |         |         |
         |         |         |          /-Deni
          \edge.5--|          \edge.2--|
                   |                    \-Kulina
                   |
                   |          /-Arawa
                    \edge.4--|
                              \-Paumari


In [104]:
from lingpy import *
from lingpy.compare.partial import Partial
from lingpy.evaluate.acd import partial_bcubes
from lingpy import basictypes as bt
from collabutils import edictor
from sys import argv

#if "fetch" in argv:
 #   edictor.fetch("tuled", remote_dbase="arawa.sqlite3")

# check cognate identifiers to be correct
# wl = Wordlist("tuled.sqlite3")
wl = Wordlist("arawa.tsv")
for idx, doc, con, tks, cogids in wl.iter_rows("doculect", "concept", "tokens",
        "cogids"):
    new_cogids = [x for x in cogids]
    if len(tks.n) != len(cogids):
        new_cogids = [x for x in cogids][:len(tks.n)]
        print(doc, con, tks, cogids)
        for i in range(len(tks.n)-len(cogids)):
            new_cogids += [0]
        wl[idx, "cogids"] = bt.ints(new_cogids)

# extract subset of languages with more than 200 items
D = {0: ["doculect", "concept", "value", "form", "tokens", "morphemes", "cogid" ,"cogids"]}
languages = [k for k, v in wl.coverage().items() if v >=200]
for idx in wl:
    if wl[idx, "doculect"] in languages:
        D[idx] = [wl[idx, h] for h in D[0]]

part = Partial(D, check=True)
part.partial_cluster(method="sca", ref="scaids", threshold=0.45)
part.get_partial_scorer(runs=10000)
part.partial_cluster(method="lexstat", ref="lexstatids", threshold=0.55)

partial_bcubes(part, "cogids", "scaids")
partial_bcubes(part, "cogids", "lexstatids")
part.output("tsv", filename="partial-sca=lexstat", ignore="all", prettify=False)

2022-03-15 10:50:54,221 [INFO] No obvious errors found in the data.
PARTIAL SEQUENCE CLUSTERING:   0%|          | 0/556 [00:00<?, ?it/s]2022-03-15 10:50:54,567 [INFO] Analyzing concept ABIU...
2022-03-15 10:50:54,570 [INFO] Analyzing concept ABOVE...
2022-03-15 10:50:54,574 [INFO] Analyzing concept ACAI PALM...
2022-03-15 10:50:54,578 [INFO] Analyzing concept AGOUTI...
2022-03-15 10:50:54,585 [INFO] Analyzing concept ALL...
2022-03-15 10:50:54,589 [INFO] Analyzing concept ALLIGATOR...
2022-03-15 10:50:54,599 [INFO] Analyzing concept ANIMAL...
2022-03-15 10:50:54,639 [INFO] Analyzing concept ANT...
2022-03-15 10:50:54,642 [INFO] Analyzing concept ANT (SPECIES)...
2022-03-15 10:50:54,651 [INFO] Analyzing concept ANTEATER...
2022-03-15 10:50:54,654 [INFO] Analyzing concept ANUS...
2022-03-15 10:50:54,655 [INFO] Analyzing concept APUNÃ...
2022-03-15 10:50:54,658 [INFO] Analyzing concept ARACUÃ...
2022-03-15 10:50:54,662 [INFO] Analyzing concept ARAUNÃ...
2022-03-15 10:50:54,664 [INFO] Anal

2022-03-15 10:50:55,191 [INFO] Analyzing concept EAT...
2022-03-15 10:50:55,194 [INFO] Analyzing concept EDGE...
2022-03-15 10:50:55,204 [INFO] Analyzing concept EGG...
2022-03-15 10:50:55,207 [INFO] Analyzing concept ELECTRIC EEL...
2022-03-15 10:50:55,211 [INFO] Analyzing concept EXCHANGE...
2022-03-15 10:50:55,221 [INFO] Analyzing concept EYE...
PARTIAL SEQUENCE CLUSTERING:  24%|██▍       | 135/556 [00:00<00:02, 196.60it/s]2022-03-15 10:50:55,226 [INFO] Analyzing concept EYEBROW...
2022-03-15 10:50:55,237 [INFO] Analyzing concept FAECES (EXCREMENT)...
2022-03-15 10:50:55,249 [INFO] Analyzing concept FALL...
2022-03-15 10:50:55,254 [INFO] Analyzing concept FAN (OBJECT)...
2022-03-15 10:50:55,258 [INFO] Analyzing concept FAR...
2022-03-15 10:50:55,262 [INFO] Analyzing concept FAST...
2022-03-15 10:50:55,271 [INFO] Analyzing concept FASTEN...
2022-03-15 10:50:55,274 [INFO] Analyzing concept FAT (ORGANIC SUBSTANCE)...
2022-03-15 10:50:55,279 [INFO] Analyzing concept FATHER...
2022-03-15

2022-03-15 10:50:55,747 [INFO] Analyzing concept LIVER...
2022-03-15 10:50:55,757 [INFO] Analyzing concept LONG...
2022-03-15 10:50:55,762 [INFO] Analyzing concept LOUSE...
2022-03-15 10:50:55,769 [INFO] Analyzing concept LOW...
2022-03-15 10:50:55,770 [INFO] Analyzing concept LUBRICATE...
2022-03-15 10:50:55,772 [INFO] Analyzing concept LUNG...
2022-03-15 10:50:55,776 [INFO] Analyzing concept LUNG FISH...
2022-03-15 10:50:55,778 [INFO] Analyzing concept MACAW...
2022-03-15 10:50:55,781 [INFO] Analyzing concept MAIZE...
PARTIAL SEQUENCE CLUSTERING:  48%|████▊     | 268/556 [00:01<00:01, 254.08it/s]2022-03-15 10:50:55,788 [INFO] Analyzing concept MAN...
2022-03-15 10:50:55,791 [INFO] Analyzing concept MANDI...
2022-03-15 10:50:55,796 [INFO] Analyzing concept MANY...
2022-03-15 10:50:55,799 [INFO] Analyzing concept MARK...
2022-03-15 10:50:55,802 [INFO] Analyzing concept MARMOSET...
2022-03-15 10:50:55,805 [INFO] Analyzing concept MATERNAL UNCLE (MOTHER'S BROTHER)...
2022-03-15 10:50:55,

2022-03-15 10:50:56,364 [INFO] Analyzing concept SCROTUM...
2022-03-15 10:50:56,366 [INFO] Analyzing concept SEA...
2022-03-15 10:50:56,369 [INFO] Analyzing concept SEAGULL...
2022-03-15 10:50:56,370 [INFO] Analyzing concept SEAT...
2022-03-15 10:50:56,373 [INFO] Analyzing concept SEE...
2022-03-15 10:50:56,375 [INFO] Analyzing concept SEED...
2022-03-15 10:50:56,377 [INFO] Analyzing concept SEW...
2022-03-15 10:50:56,380 [INFO] Analyzing concept SHAKE...
2022-03-15 10:50:56,384 [INFO] Analyzing concept SHAMAN (FOLK HEALER)...
2022-03-15 10:50:56,387 [INFO] Analyzing concept SHARP...
2022-03-15 10:50:56,389 [INFO] Analyzing concept SHARPEN (SOMETHING)...
2022-03-15 10:50:56,391 [INFO] Analyzing concept SHELLFISH...
2022-03-15 10:50:56,393 [INFO] Analyzing concept SHINY...
2022-03-15 10:50:56,394 [INFO] Analyzing concept SHIT (DEFECATE)...
2022-03-15 10:50:56,395 [INFO] Analyzing concept SHORT...
2022-03-15 10:50:56,405 [INFO] Analyzing concept SHOW...
2022-03-15 10:50:56,407 [INFO] Ana

2022-03-15 10:50:56,837 [INFO] Analyzing concept VINE...
2022-03-15 10:50:56,841 [INFO] Analyzing concept VISIBLE...
2022-03-15 10:50:56,843 [INFO] Analyzing concept VOICE...
2022-03-15 10:50:56,845 [INFO] Analyzing concept VOMIT...
2022-03-15 10:50:56,847 [INFO] Analyzing concept VULTURE...
2022-03-15 10:50:56,848 [INFO] Analyzing concept WAIT (FOR)...
2022-03-15 10:50:56,851 [INFO] Analyzing concept WALK BENT OVER...
2022-03-15 10:50:56,852 [INFO] Analyzing concept WARM...
2022-03-15 10:50:56,856 [INFO] Analyzing concept WASH...
2022-03-15 10:50:56,863 [INFO] Analyzing concept WASP...
2022-03-15 10:50:56,870 [INFO] Analyzing concept WATER...
2022-03-15 10:50:56,873 [INFO] Analyzing concept WE...
2022-03-15 10:50:56,874 [INFO] Analyzing concept WE (EXCLUSIVE)...
2022-03-15 10:50:56,875 [INFO] Analyzing concept WE (INCLUSIVE)...
PARTIAL SEQUENCE CLUSTERING:  96%|█████████▌| 533/556 [00:02<00:00, 261.48it/s]2022-03-15 10:50:56,880 [INFO] Analyzing concept WEASEL...
2022-03-15 10:50:56,8

RANDOM CORRESPONDENCE CALCULATION:  61%|██████    | 15/24.5 [00:25<00:16,  1.73s/it]2022-03-15 10:51:24,847 [INFO] Calculating random alignmentsfor pair Jamamadi/Jarawara.
RANDOM CORRESPONDENCE CALCULATION:  65%|██████▌   | 16/24.5 [00:27<00:14,  1.73s/it]2022-03-15 10:51:26,583 [INFO] Calculating random alignmentsfor pair Jamamadi/Kulina.
RANDOM CORRESPONDENCE CALCULATION:  69%|██████▉   | 17/24.5 [00:29<00:13,  1.74s/it]2022-03-15 10:51:28,342 [INFO] Calculating random alignmentsfor pair Jamamadi/Paumari.
RANDOM CORRESPONDENCE CALCULATION:  73%|███████▎  | 18/24.5 [00:31<00:11,  1.79s/it]2022-03-15 10:51:30,233 [INFO] Calculating random alignmentsfor pair Jamamadi/Sorowaha.
RANDOM CORRESPONDENCE CALCULATION:  78%|███████▊  | 19/24.5 [00:32<00:09,  1.76s/it]2022-03-15 10:51:31,935 [INFO] Calculating random alignmentsfor pair Jarawara/Jarawara.
RANDOM CORRESPONDENCE CALCULATION:  82%|████████▏ | 20/24.5 [00:34<00:07,  1.74s/it]2022-03-15 10:51:33,623 [INFO] Calculating random alignment

2022-03-15 10:51:50,078 [INFO] Analyzing concept COLD...
2022-03-15 10:51:50,084 [INFO] Analyzing concept COMB...
2022-03-15 10:51:50,088 [INFO] Analyzing concept COME...
2022-03-15 10:51:50,091 [INFO] Analyzing concept COOK (SOMETHING)...
2022-03-15 10:51:50,093 [INFO] Analyzing concept CORPSE...
2022-03-15 10:51:50,094 [INFO] Analyzing concept CORRECT (RIGHT)...
2022-03-15 10:51:50,100 [INFO] Analyzing concept COTTON...
2022-03-15 10:51:50,104 [INFO] Analyzing concept COUGH...
2022-03-15 10:51:50,107 [INFO] Analyzing concept CRAB...
2022-03-15 10:51:50,109 [INFO] Analyzing concept CRICKET...
2022-03-15 10:51:50,114 [INFO] Analyzing concept CRY...
PARTIAL SEQUENCE CLUSTERING:  19%|█▊        | 104/556 [00:00<00:02, 217.89it/s]2022-03-15 10:51:50,120 [INFO] Analyzing concept CURASSOW...
2022-03-15 10:51:50,152 [INFO] Analyzing concept CUT...
2022-03-15 10:51:50,157 [INFO] Analyzing concept DARK...
2022-03-15 10:51:50,161 [INFO] Analyzing concept DAUGHTER...
2022-03-15 10:51:50,166 [INFO

2022-03-15 10:51:50,780 [INFO] Analyzing concept IN FRONT OF...
2022-03-15 10:51:50,788 [INFO] Analyzing concept INDIGENOUS PEOPLES OF THE AMERICAS...
2022-03-15 10:51:50,791 [INFO] Analyzing concept INGA...
2022-03-15 10:51:50,792 [INFO] Analyzing concept INSECT...
2022-03-15 10:51:50,805 [INFO] Analyzing concept INSIDE...
2022-03-15 10:51:50,810 [INFO] Analyzing concept ISLAND...
2022-03-15 10:51:50,814 [INFO] Analyzing concept JACKFRUIT...
2022-03-15 10:51:50,822 [INFO] Analyzing concept JAGUAR...
2022-03-15 10:51:50,825 [INFO] Analyzing concept JUMP...
2022-03-15 10:51:50,828 [INFO] Analyzing concept KAPOK CEIBE TREE...
2022-03-15 10:51:50,831 [INFO] Analyzing concept KILL...
2022-03-15 10:51:50,841 [INFO] Analyzing concept KINGFISHER...
2022-03-15 10:51:50,844 [INFO] Analyzing concept KINKAJOU...
2022-03-15 10:51:50,854 [INFO] Analyzing concept KNEE...
PARTIAL SEQUENCE CLUSTERING:  43%|████▎     | 237/556 [00:01<00:01, 189.31it/s]2022-03-15 10:51:50,858 [INFO] Analyzing concept KN

2022-03-15 10:51:51,362 [INFO] Analyzing concept POT...
2022-03-15 10:51:51,367 [INFO] Analyzing concept POUND TO FLATTEN...
2022-03-15 10:51:51,372 [INFO] Analyzing concept PRECIOUS...
2022-03-15 10:51:51,375 [INFO] Analyzing concept PREGNANT...
2022-03-15 10:51:51,377 [INFO] Analyzing concept PRETEND...
2022-03-15 10:51:51,378 [INFO] Analyzing concept PROCREATE...
2022-03-15 10:51:51,382 [INFO] Analyzing concept PULL...
PARTIAL SEQUENCE CLUSTERING:  64%|██████▍   | 358/556 [00:01<00:00, 228.59it/s]2022-03-15 10:51:51,386 [INFO] Analyzing concept PULL OFF (SKIN)...
2022-03-15 10:51:51,388 [INFO] Analyzing concept PULL OUT...
2022-03-15 10:51:51,390 [INFO] Analyzing concept PURPLE BANANA...
2022-03-15 10:51:51,395 [INFO] Analyzing concept PUSH...
2022-03-15 10:51:51,399 [INFO] Analyzing concept PUT...
2022-03-15 10:51:51,401 [INFO] Analyzing concept PYTHON...
2022-03-15 10:51:51,405 [INFO] Analyzing concept RAIN (PRECIPITATION)...
2022-03-15 10:51:51,407 [INFO] Analyzing concept RAINBO

2022-03-15 10:51:51,920 [INFO] Analyzing concept THROW WATER...
2022-03-15 10:51:51,924 [INFO] Analyzing concept THUNDER...
2022-03-15 10:51:51,929 [INFO] Analyzing concept TICK...
2022-03-15 10:51:51,938 [INFO] Analyzing concept TIE...
2022-03-15 10:51:51,942 [INFO] Analyzing concept TINGUÍ...
2022-03-15 10:51:51,944 [INFO] Analyzing concept TOAD...
PARTIAL SEQUENCE CLUSTERING:  88%|████████▊ | 487/556 [00:02<00:00, 221.48it/s]2022-03-15 10:51:51,948 [INFO] Analyzing concept TOAST...
2022-03-15 10:51:51,953 [INFO] Analyzing concept TOBACCO...
2022-03-15 10:51:51,955 [INFO] Analyzing concept TOMB...
2022-03-15 10:51:51,958 [INFO] Analyzing concept TONGUE...
2022-03-15 10:51:51,961 [INFO] Analyzing concept TONKA BEAN...
2022-03-15 10:51:51,967 [INFO] Analyzing concept TOOTH...
2022-03-15 10:51:51,978 [INFO] Analyzing concept TORTOISE...
2022-03-15 10:51:51,984 [INFO] Analyzing concept TOUCAN...
2022-03-15 10:51:51,988 [INFO] Analyzing concept TREAD ON...
2022-03-15 10:51:51,994 [INFO] A

*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9128 *
* Recall:        0.8930 *
* F-Scores:      0.9028 *
*************************'
*************************
* B-Cubed-Scores        *
* --------------------- *
* Precision:     0.9631 *
* Recall:        0.8806 *
* F-Scores:      0.9200 *
*************************'
