In this work, we utilized a Word2Vec model to generate vector representations of IPA segments, inspired by the approach described in the paper “IPA Alignment Using Vector Representations” by Pavel Sofroniev and Çağri Çöltekin(https://github.com/pavelsof/ipavec/blob/master/paper/thesis.pdf). The Word2Vec model was trained specifically for generating these vector representations and is based on the methodology provided in their research.

In [1]:
!pip install gensim
!pip install ipatok

Collecting ipatok
  Downloading ipatok-0.4.2-py2.py3-none-any.whl.metadata (6.3 kB)
Downloading ipatok-0.4.2-py2.py3-none-any.whl (15 kB)
Installing collected packages: ipatok
Successfully installed ipatok-0.4.2


In [57]:
!pip install seaborn
!pip install matplotlib



In [61]:
!pip install geopy



In [63]:
import pandas as pd
from gensim.models import Word2Vec
from google.colab import drive
import os
import warnings
from ipatok.ipa import is_letter, is_tie_bar
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from geopy.distance import great_circle

In [3]:
# Mount Google Drive
drive.mount('/content/gdrive')
forms = pd.read_csv("/content/gdrive/My Drive/Data Science/forms.csv")
# Check the data
forms

Mounted at /content/gdrive


Unnamed: 0,ID,Local_ID,Language_ID,Parameter_ID,Value,Form,Segments,Comment,Source,Cognacy,Loan,Graphemes,Profile,Prosody,Morpheme_Glosses,Partial_Cognacy,Chinese_Characters
0,Beijing-91_vomit-1,,Beijing,91_vomit,tʰu⁵¹,tʰu⁵¹,tʰ u ⁵¹,,Liu2007,,,,,i n t,spit/吐,1,吐
1,Haerbin-91_vomit-1,,Haerbin,91_vomit,tʰu⁵³,tʰu⁵³,tʰ u ⁵³,,Liu2007,,,,,i n t,spit/吐,1,吐
2,Jinan-91_vomit-1,,Jinan,91_vomit,tʰu³¹,tʰu³¹,tʰ u ³¹,,Liu2007,,,,,i n t,spit/吐,1,吐
3,Rongcheng-91_vomit-1,,Rongcheng,91_vomit,ou²¹³⁻³⁵ tʰu²¹⁴,ou²¹³⁻³⁵ tʰu²¹⁴,ou ²¹³ + tʰ u ²¹⁴,copulative synonyme,Liu2007,,,,,n t + i n t,nausea/嘔 spit/吐,2 1,嘔 吐
4,Taiyuan-91_vomit-1,,Taiyuan,91_vomit,tʰu⁵³ lə⁰,tʰu⁵³ lə⁰,tʰ u ⁵³ + l ə ⁰,,Liu2007,,,,,i n t + i n t,nausea/嘔 _:PERFECTIVE/了,2 5,嘔 嘞
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4297,Guangzhou-90_woman-1,,Guangzhou,90_woman,nøy²³ iɐn²¹⁻²³,nøy²³ iɐn²¹⁻²³,n øy ²³ + j ɐ n ²¹,,Liu2007,,,,,i n t + i n c t,female/女 _person/人,39 38,女 人
4298,Fuzhou-90_woman-1,,Fuzhou,90_woman,i⁵⁵⁻⁵³ tsia³²,i⁵⁵⁻⁵³ tsia³²,i ⁵⁵ + ts j a ³²,,Liu2007,,,,,n t + i m n t,woman/伊 sister/姐,824 715,伊 姐
4299,Fuzhou-90_woman-2,,Fuzhou,90_woman,ny³²⁻⁵⁵ ɛ²¹²,ny³²⁻⁵⁵ ɛ²¹²,n y ³² + ɛ ²¹²,,Liu2007,,,,,i n t + n t,female/女 _world/界,39 825,女 界
4300,Fuzhou-90_woman-3,,Fuzhou,90_woman,tsy⁵⁵ nøyŋ⁵³⁻⁵⁵ nøyŋ⁵³,tsy⁵⁵ nøyŋ⁵³⁻⁵⁵ nøyŋ⁵³,ts y ⁵⁵ + n øy ŋ ⁵³ + n øy ŋ ⁵³,,Liu2007,,,,,i n t + i n c t + i n c t,woman/諸 female/娘 _person/人,749 31 38,諸 娘 儂


In [16]:
# Training a word2vec model to generate the vector representations of IPA segments
# First, we'll use the traget data conmbined with the annotated chinese dialets data
# as the training data, for the model's only purpose is to generate the embedding for the
# target data, so overfitting is not a problem.


def normalise_token(token):
	return ''.join([char for char in token
					if not is_tie_bar(char) and char != '◌̯'[1]])

def read_from_dir(dataset_path):
  # Initialize a list to store the result
  ipa_data = []
  # Iterate over all the file in the dir
  for file in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file)
    # Check if the file is in tsv form
    if os.path.isfile(file_path) and file_path.endswith('.tsv'):
      # Read and process the ipa
      with open(file_path, encoding='utf-8') as f:
        for line in f:
          col = line.strip().split('\t')
          if col[0] == 'language':
            continue
          # The IPA tokens is the 6th column of the tsv table
          tokens = col[5]
          ipa_data.append(normalise_token(tokens).strip())
  return ipa_data

# Read from the target data
form = forms['Form']
tar_data = []
for ipa in form:
    if pd.notna(ipa):
        seg = [normalise_token(ipa).strip()]
        tar_data.append(seg)

dir_data = read_from_dir("/content/gdrive/My Drive/Data Science/data")
ipa_data = tar_data
# Add target data and the extra training data together
ipa_data.extend(dir_data)
ipa_data


[['tʰu⁵¹'],
 ['tʰu⁵³'],
 ['tʰu³¹'],
 ['ou²¹³⁻³⁵ tʰu²¹⁴'],
 ['tʰu⁵³ lə⁰'],
 ['ŋou²¹ tʰu⁵³'],
 ['fa²¹⁻⁵⁵ tʰu⁵³ lo²¹'],
 ['tʰu⁵³⁻⁵⁵ lo²¹'],
 ['ŋəu⁵³⁻⁴⁵ lo²¹'],
 ['tʰu⁴⁴'],
 ['əɯ¹¹⁻²² tʰu⁴⁴⁻²²'],
 ['fɔ⁵⁵⁻⁴⁵ ŋoʔ³²*'],
 ['ʏ⁵²'],
 ['tʰøy⁵¹'],
 ['tʰu⁵⁵'],
 ['xue⁴²'],
 ['ta³⁵⁻⁵ xue⁴²'],
 ['ŋiɛu²¹³'],
 ['tʰu²¹³'],
 ['pʰon³³'],
 ['tʰu²⁵'],
 ['ŋɐu³⁵ tʰou³³'],
 ['tʰou²¹²'],
 ['tʰo²¹'],
 ['pʰᴀ⁵¹'],
 ['pʰa⁵³'],
 ['pʰa³¹'],
 ['pʰa³³⁴'],
 ['pʰa⁴⁵'],
 ['pʰɑ⁴⁴'],
 ['pʰa²¹³'],
 ['tsʰa²¹³'],
 ['pʰɑ⁴⁴'],
 ['pʰo³⁵'],
 ['pʰo⁴¹²'],
 ['ho⁴¹'],
 ['pʰa⁵⁵'],
 ['pʰɔ³⁵'],
 ['pʰa²¹³'],
 ['pʰa⁵³'],
 ['vi⁵³'],
 ['kiaŋ⁴⁴'],
 ['pʰᴀ²⁵'],
 ['pʰa³³'],
 ['kiaŋ⁵⁵'],
 ['kiã⁵⁵'],
 ['pʰi³⁵ fu⁰'],
 ['ʐou⁵³ pʰiər²⁴'],
 ['pʰi⁵³ fu²¹⁴'],
 ['pʰi³⁵ fu⁵²'],
 ['pʰi²¹ fu²¹'],
 ['pʰi²⁴'],
 ['pʰi²¹⁻⁵⁵ pʰi²¹⁻⁵⁵'],
 ['pʰi²¹⁻⁵⁵ tsɿ⁵³'],
 ['pʰi¹³ fu³¹'],
 ['pʰɿ⁴⁴'],
 ['bi¹³⁻²² fu⁴⁴⁻³¹'],
 ['bei³⁴¹⁻³³ fu⁴⁴⁻²²'],
 ['pi¹³ fu³³'],
 ['pʰi¹³'],
 ['pʰi²⁴ fu⁰'],
 ['pʰi¹¹'],
 ['pi²² fu⁴²⁻²²'],
 ['pʰei²¹ fu⁵³'],
 ['pʰuoi⁵³⁻⁵⁵ u⁵⁵'],
 ['pʰuoi⁵³'],
 ['pʰe

In [37]:
# Train the model
model = Word2Vec(
				sentences=ipa_data,
				vector_size=15,  # the length of the output vectors
				window=1,  # that many to the left and that many to the right
				seed=42,  # random seed
				workers=1,  # needed for reproducibility
				min_count=1,  # ignore tokens occurring less often than that
				sg=1,  # 0 for cbow, 1 for skip-gram
				negative=1,  # number of negative samples (per positive one?)
				epochs=20,  # number of epochs
				null_word=True)  # reached by ['\0']

In [39]:
# Return the vector representation of the IPA, if it's in the set of  word2vec word vectors.
def get_vector(token):
  # Normalize and strip the token
	token = normalise_token(token).strip()
  # Check if the token exists in the model's vocabulary
	if token in model.wv:
		return token
  # If the token is an empty string, return a zero vector
	if token == '':
		return '\0'
  # Create an alternative token by removing non-letter characters
	alt_token = ''.join([char for char in token if is_letter(char, False)])
  # Check if the alternative token exists in the model's vocabulary
	if alt_token in model.wv:
		return alt_token
  # Warn that the token cannot be recognized by the model
	warnings.warn('phon2vec: cannot recognise {}'.format(token))
	return '\0'

# Calculate the similiraty between two IPA segments
def calc_sim(ipa_a, ipa_b):
  # Retrieve vectors for both IPA segments
  vec_a = get_vector(ipa_a)
  vec_b = get_vector(ipa_b)
  # If either vector is a zero vector, return 0 as similarity
  if vec_a == "\0" or vec_b == "\0":
    return 0
  # Normalize similarity to the range [0, 1]
  normalized_sim = (model.wv.similarity(vec_a, vec_b) + 1) / 2
  return normalized_sim

test = calc_sim('tʰu⁵¹', 'tʰu⁵³')# fa²¹⁻⁵⁵ tʰu⁵³ lo²¹
test


0.3846202865242958

Now we measure the pronunciation similarity of synonymous dialects between each city based on the trained model.

In [40]:
# Initial an empty dict
city_ipa_dict = {}
# Iterate over thecsv table
for index, row in forms.iterrows():
    parameter_id = row['Parameter_ID']
    language_id = row['Language_ID']
    ipa = row['Form']

    # If parameter_id not in the dict
    if parameter_id not in city_ipa_dict:
        city_ipa_dict[parameter_id] = {}

    # If the city is already in the parameter_id dict
    if language_id in city_ipa_dict[parameter_id]:
        city_ipa_dict[parameter_id][language_id].append(ipa.strip())
    # If the city is not in the parameter_id dict
    else:
        city_ipa_dict[parameter_id][language_id] = [ipa.strip()]
city_ipa_dict


{'91_vomit': {'Beijing': ['tʰu⁵¹'],
  'Haerbin': ['tʰu⁵³'],
  'Jinan': ['tʰu³¹'],
  'Rongcheng': ['ou²¹³⁻³⁵ tʰu²¹⁴'],
  'Taiyuan': ['tʰu⁵³ lə⁰'],
  'XiAn': ['ŋou²¹ tʰu⁵³'],
  'Chengdu': ['fa²¹⁻⁵⁵ tʰu⁵³ lo²¹', 'tʰu⁵³⁻⁵⁵ lo²¹', 'ŋəu⁵³⁻⁴⁵ lo²¹'],
  'Nanjing': ['tʰu⁴⁴', 'əɯ¹¹⁻²² tʰu⁴⁴⁻²²'],
  'Jixi': ['fɔ⁵⁵⁻⁴⁵ ŋoʔ³²*'],
  'Suzhou': ['ʏ⁵²'],
  'Wenzhou': ['tʰøy⁵¹'],
  'Changsha': ['tʰu⁵⁵'],
  'Loudi': ['xue⁴²', 'ta³⁵⁻⁵ xue⁴²'],
  'Nanchang': ['ŋiɛu²¹³', 'tʰu²¹³'],
  'Meixian': ['pʰon³³'],
  'Guilin': ['tʰu²⁵'],
  'Guangzhou': ['ŋɐu³⁵ tʰou³³'],
  'Fuzhou': ['tʰou²¹²'],
  'Xiamen': ['tʰo²¹']},
 '92_fear': {'Beijing': ['pʰᴀ⁵¹'],
  'Haerbin': ['pʰa⁵³'],
  'Jinan': ['pʰa³¹'],
  'Rongcheng': ['pʰa³³⁴'],
  'Taiyuan': ['pʰa⁴⁵'],
  'XiAn': ['pʰɑ⁴⁴'],
  'Chengdu': ['pʰa²¹³', 'tsʰa²¹³'],
  'Nanjing': ['pʰɑ⁴⁴'],
  'Jixi': ['pʰo³⁵'],
  'Suzhou': ['pʰo⁴¹²'],
  'Wenzhou': ['ho⁴¹'],
  'Changsha': ['pʰa⁵⁵'],
  'Loudi': ['pʰɔ³⁵'],
  'Nanchang': ['pʰa²¹³'],
  'Meixian': ['pʰa⁵³', 'vi⁵³', 'kiaŋ⁴⁴'],
  'Guilin'

In [49]:
cities = forms['Language_ID'].unique()
# a method that calculate the similarites of a parameter_id between all the cities
def sim_bet_cities(a_parameter_id_dict, city_names, para_id):
  # Create a matrix to store the results
  similarity_matrix = pd.DataFrame(index=city_names, columns=city_names)
  # Iterate to compare all the cities with each other
  for a in city_names:
    for b in city_names:
      # Set the similarity to one if a city is compared with itself
      if a == b:
        similarity_matrix.loc[a, b] = 1.0
      else:
        # In case there are cities that do not have the dialects of some particular meanings
        ipa_list_a = a_parameter_id_dict.get(a, [])
        ipa_list_b = a_parameter_id_dict.get(b, [])
        sims = []
        # Calculate the similarities of all the dialects city a has with all the dialects city b has
        for ipa_a in ipa_list_a:
            for ipa_b in ipa_list_b:
                sim = calc_sim(ipa_a, ipa_b)
                sims.append(sim)
        # If there is a city has no dialect of a word, set the similarity to zero
        if not sims:
          similarity_matrix.loc[a, b] = 0.0
        else:
          # Use Average similarity
          similarity_matrix.loc[a, b] = sum(sims) / len(sims)
        similarity_matrix.name = para_id
  return similarity_matrix
test_sim_matrix = sim_bet_cities(city_ipa_dict['91_vomit'], cities, '91_vomit')
test_sim_matrix

Unnamed: 0,Beijing,Haerbin,Jinan,Rongcheng,Taiyuan,XiAn,Chengdu,Nanjing,Jixi,Suzhou,Wenzhou,Changsha,Loudi,Nanchang,Meixian,Guilin,Guangzhou,Fuzhou,Xiamen
Beijing,1.0,0.38462,0.629438,0.684913,0.445835,0.526848,0.394144,0.383133,0.464549,0.548611,0.470122,0.467213,0.361977,0.470131,0.621924,0.507572,0.418081,0.387073,0.457001
Haerbin,0.38462,1.0,0.329955,0.186266,0.371532,0.708985,0.60644,0.378942,0.45997,0.522533,0.719878,0.413929,0.660586,0.446382,0.534304,0.421892,0.270857,0.629743,0.369045
Jinan,0.629438,0.329955,1.0,0.618585,0.390385,0.317872,0.344136,0.648778,0.513692,0.160137,0.664801,0.345083,0.422641,0.463205,0.528798,0.59346,0.617822,0.484287,0.654856
Rongcheng,0.684913,0.186266,0.618585,1.0,0.601123,0.314238,0.493852,0.597198,0.618121,0.401184,0.244069,0.588402,0.336602,0.50538,0.577913,0.470764,0.453846,0.576117,0.435307
Taiyuan,0.445835,0.371532,0.390385,0.601123,1.0,0.530945,0.500791,0.41336,0.387507,0.597086,0.318747,0.528428,0.41841,0.378108,0.201906,0.597404,0.630228,0.49916,0.458724
XiAn,0.526848,0.708985,0.317872,0.314238,0.530945,1.0,0.554408,0.42698,0.305869,0.698159,0.570454,0.48192,0.539146,0.421692,0.51016,0.459963,0.454299,0.381084,0.498428
Chengdu,0.394144,0.60644,0.344136,0.493852,0.500791,0.554408,1.0,0.496204,0.519496,0.538596,0.358258,0.455998,0.538497,0.629581,0.499243,0.562327,0.367229,0.542552,0.319006
Nanjing,0.383133,0.378942,0.648778,0.597198,0.41336,0.42698,0.496204,1.0,0.433627,0.390091,0.5411,0.429139,0.523933,0.57777,0.516419,0.511719,0.599598,0.655127,0.604103
Jixi,0.464549,0.45997,0.513692,0.618121,0.387507,0.305869,0.519496,0.433627,1.0,0.370272,0.428658,0.640819,0.414177,0.492483,0.554364,0.409319,0.237188,0.484792,0.429457
Suzhou,0.548611,0.522533,0.160137,0.401184,0.597086,0.698159,0.538596,0.390091,0.370272,1.0,0.275704,0.62308,0.530157,0.49594,0.50096,0.375886,0.482294,0.414453,0.453139


In [60]:
# Combine all dataframes into one big dataframe
para_ids = forms['Parameter_ID'].unique()
sim_dfs = []
for id in para_ids:
  sim_dfs.append(sim_bet_cities(city_ipa_dict[id], cities, id))
combined_sim_dfs = pd.concat(sim_dfs, keys=[df.name for df in sim_dfs], names=["Parameter_ID","Cities"])
combined_sim_dfs = combined_sim_dfs.reset_index()
combined_sim_dfs

Unnamed: 0,Parameter_ID,Cities,Beijing,Haerbin,Jinan,Rongcheng,Taiyuan,XiAn,Chengdu,Nanjing,...,Suzhou,Wenzhou,Changsha,Loudi,Nanchang,Meixian,Guilin,Guangzhou,Fuzhou,Xiamen
0,91_vomit,Beijing,1.0,0.38462,0.629438,0.684913,0.445835,0.526848,0.394144,0.383133,...,0.548611,0.470122,0.467213,0.361977,0.470131,0.621924,0.507572,0.418081,0.387073,0.457001
1,91_vomit,Haerbin,0.38462,1.0,0.329955,0.186266,0.371532,0.708985,0.60644,0.378942,...,0.522533,0.719878,0.413929,0.660586,0.446382,0.534304,0.421892,0.270857,0.629743,0.369045
2,91_vomit,Jinan,0.629438,0.329955,1.0,0.618585,0.390385,0.317872,0.344136,0.648778,...,0.160137,0.664801,0.345083,0.422641,0.463205,0.528798,0.59346,0.617822,0.484287,0.654856
3,91_vomit,Rongcheng,0.684913,0.186266,0.618585,1.0,0.601123,0.314238,0.493852,0.597198,...,0.401184,0.244069,0.588402,0.336602,0.50538,0.577913,0.470764,0.453846,0.576117,0.435307
4,91_vomit,Taiyuan,0.445835,0.371532,0.390385,0.601123,1.0,0.530945,0.500791,0.41336,...,0.597086,0.318747,0.528428,0.41841,0.378108,0.201906,0.597404,0.630228,0.49916,0.458724
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3852,90_woman,Meixian,0.722204,0.446065,0.534086,0.567421,0.456802,0.45432,0.536772,0.581148,...,0.343791,0.537171,0.457638,0.703294,0.63198,1.0,0.444716,0.419925,0.547321,0.547851
3853,90_woman,Guilin,0.394591,0.217798,0.438704,0.716805,0.508289,0.703535,0.478202,0.271581,...,0.555065,0.464309,0.365828,0.494926,0.536736,0.444716,1.0,0.77393,0.467211,0.630851
3854,90_woman,Guangzhou,0.415459,0.366032,0.506132,0.681737,0.674111,0.696208,0.505006,0.287948,...,0.564218,0.527855,0.442714,0.393628,0.613958,0.419925,0.77393,1.0,0.410757,0.465176
3855,90_woman,Fuzhou,0.60837,0.658156,0.543154,0.546759,0.399213,0.480001,0.510561,0.564148,...,0.404269,0.562675,0.455055,0.468476,0.433323,0.547321,0.467211,0.410757,1.0,0.400142


In [72]:
# Distance between cities
data = pd.read_csv("/content/gdrive/My Drive/Data Science/languages.csv")
city_la_lo= data[['Name', 'Latitude', 'Longitude']]
# Initialize a new DataFrame to store the result
distance = pd.DataFrame(index=city_la_lo['Name'], columns=city_la_lo['Name'])
# Calculate the distance between each pair of cities
for i, city1 in city_la_lo.iterrows():
    coords_1 = (city1['Latitude'], city1['Longitude'])
    for j, city2 in city_la_lo.iterrows():
        coords_2 = (city2['Latitude'], city2['Longitude'])
        dis = great_circle(coords_1, coords_2).kilometers
        distance.at[city1['Name'], city2['Name']] = dis
distance

Name,Beijing,Changsha,Chengdu,Fuzhou,Guangzhou,Guilin,Ha_erbin,Jinan,Jixi,Loudi,Meixian,Nanchang,Nanjing,Rongcheng,Suzhou,Taiyuan,Wenzhou,Xi_an,Xiamen
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Beijing,0.0,1266.628201,1511.474249,1567.916447,1893.158891,1719.454965,1072.768366,369.201189,1119.986882,1410.755967,1733.244378,1249.162364,896.672829,631.736049,1031.156059,395.655855,1392.040046,897.285126,1727.191646
Changsha,1266.628201,0.0,887.818631,697.595988,636.588631,477.62868,2233.11245,950.377743,563.429342,159.076906,579.59291,284.455768,639.95292,1275.800796,765.053766,1003.441433,764.264969,706.977491,704.128618
Chengdu,1511.474249,887.818631,0.0,1579.77384,1217.542982,858.137122,2584.002326,1380.802271,1403.443375,835.73398,1364.764947,1167.24267,1384.544438,1848.529893,1563.643398,1117.928778,1646.168509,617.57906,1547.147358
Fuzhou,1567.916447,697.595988,1579.77384,0.0,721.148202,907.449977,2289.518411,1198.732722,448.107106,752.765729,393.093243,446.977568,673.191595,1263.032509,591.80972,1460.57379,254.602512,1342.433667,214.766896
Guangzhou,1893.158891,636.588631,1217.542982,721.148202,0.0,360.14346,2808.142047,1557.632292,954.329133,522.503493,329.121127,685.367712,1135.44467,1802.443498,1169.551508,1639.600624,947.925059,1294.047789,544.704783
Guilin,1719.454965,477.62868,858.137122,907.449977,360.14346,0.0,2708.864797,1421.410542,976.715781,319.397531,574.941767,671.685437,1100.607008,1750.605364,1194.208954,1414.867477,1078.88761,1004.589844,791.967187
Ha_erbin,1072.768366,2233.11245,2584.002326,2289.518411,2808.142047,2708.864797,0.0,1291.863006,1885.448193,2389.739848,2573.811483,2123.273618,1679.004461,1027.665222,1700.236041,1466.455982,2049.396129,1969.856071,2492.238383
Jinan,369.201189,950.377743,1380.802271,1198.732722,1557.632292,1421.410542,1291.863006,0.0,750.786002,1103.829936,1376.037381,894.16094,527.852409,485.792821,672.099689,426.882838,1026.270598,777.379717,1360.188166
Jixi,1119.986882,563.429342,1403.443375,448.107106,954.329133,976.715781,1885.448193,750.786002,0.0,698.697659,689.675819,305.308908,225.851159,861.645236,219.618525,1036.954055,307.866921,1017.132256,622.110633
Loudi,1410.755967,159.076906,835.73398,752.765729,522.503493,319.397531,2389.739848,1103.829936,698.697659,0.0,545.838861,399.881799,794.463225,1434.747453,908.400729,1127.259691,862.713859,775.510001,712.657043


The next step was to analyze whether the similarity of dialects was correlated with the distance between cities.