<a href="https://colab.research.google.com/github/xh313/natural-class-selector/blob/main/natural_class_selector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
# Run this before anything
import pandas as pd
import matplotlib.pyplot as plt
import regex as re

def selector(df, features=dict()):
  """
  Input the features you are looking for as a dictionary.
  """
  try:
    for key in features:
      # redundancy check
      if list(df[df[key] == features[key]].index) == list(df.index):
        print(f'The feature {features[key]}{key} is redundant.')
      # empty set check
      elif list(df[df[key] == features[key]].index) == []:
        print(f'The feature {features[key]}{key} returned no phonemes.')
        return None
      # filtering if things are normal
      else:
        df = df[df[key] == features[key]]
        print(f'Applied {features[key]}{key}: {list(df.index)}')

    print('\n')
    return list(df.index)
  except IndexError:
    # for empty features
    pass


# Minimally sufficient features to select
def minimally_sufficient(df, phones='s'):
  """
  Returns the minimally sufficient selectors to 
  select out this unique phoneme.
  """
  original = df
  selectors = dict()
  if type(phones) != list:
    phones = [phones]  # listify the string
  for phone in phones:
    df = original
    features = []
    featuresdict = dict()
    try:
      while len(df) != 1:
        for feature in list(df.columns):
          presence = df.loc[phone, feature]
          if (presence != 0) and (list(
              df[df[feature] == presence].index) != list(df.index)):
            
            features.append(f'{presence}{feature}')
            #print(f'{phone}:{presence}{feature}')
            featuresdict[feature] = presence
            df = df[df[feature] == presence]
      #print(f'{phone}: {features}')
      selectors[phone] = featuresdict
    except ValueError:
      print('An ambiguity is present in the dataframe.')
      pass
  return selectors

In [72]:
# Data

df = pd.read_csv('https://github.com/xh313/natural-class-selec\
tor/raw/main/Features.csv').set_index('phoneme')


# Selector Example

# Manually enter the features
selector(df, {'syllabic': '-',
              'lateral': '-',
              'sonorant': '-',
              #'trill': '+',
              'continuant': '+',
              'anterior':'+',
              'voice': '+'})

# Use an automatically generated feature dict
selector(df, minimally_sufficient(df, ['ʃ',
                                       's',])['ʃ'])

Applied -syllabic: ['ŋ+', 'ʟ', 'ɫ', 'ɴ', 'ʀ', 'ɲ', 'ʎ', 'ŋ', 'ŋ˗', 'ʟ', 'ʟ̠', 'ɳ', 'ʙ', 'ɭ', 'ɺ', 'ɻ', 'ɽ', 'r', 'n', 'm', 'l', 'ɾ', 'ɱ', 'ʔ', 'ɣ+', 'x+', 'k+', 'ɡ+', 'k+͡x+', 'ɡ+͡ɣ+', 'ħ', 'ʕ', 'ʁ', 'q', 'χ', 'ɢ', 'ɕ', 'ɉ', 'ʝ', 'c', 'ç', 'd͡ʑ', 't͡ɕ', 'ɣ', 'ɣ̠ ', 'x', 'x̠', 'k', 'k̠', 'ɡ', 'ɡ̠', 'ʑ', 'ʈ', 'ɖ', 'ɬ', 'ʐ', 'ɸ', 'ʂ', 'ʒ', 'z', 'v', 't', 'ʃ', 's', 'p', 'f', 'd', 'b', 'θ', 'ɮ', 'ð', 'β', 'd͡ʒ', 'd͡z', 'd͡ɮ', 'd̠͡ɮ̠', 't͡ʃ', 't̠͡ɬ̠', 't͡s', 't͡ɬ', 't̪͡s̪', 't̪͡ɬ̪', 'd̪͡z̪', 'd̪͡ɮ̪', 'ʈ͡ʂ', 'ɖ͡ʐ', 'p͡f', 'b͡v', 'p͡ɸ', 'b͡β', 't̪͡θ', 'd̪͡ð', 'c͡ç', 'ɉ͡ʝ', 'k͡x', 'k̠͡x̠', 'ɡ͡ɣ', 'ɡ̠̠͡ɣ̠', 'q͡χ', 'ɢ͡ʁ', 'ɧ', 'k͡p', 'g͡b', 'p͡t', 'b͡d', 'ɰ', 'ɰ̠', 'w', 'ɥ', 'j', 'ɹ', 'ʋ', 'ʍ', 'ɦ', 'h']
Applied -lateral: ['ŋ+', 'ɴ', 'ʀ', 'ɲ', 'ŋ', 'ŋ˗', 'ɳ', 'ʙ', 'ɻ', 'ɽ', 'r', 'n', 'm', 'ɾ', 'ɱ', 'ʔ', 'ɣ+', 'x+', 'k+', 'ɡ+', 'k+͡x+', 'ɡ+͡ɣ+', 'ħ', 'ʕ', 'ʁ', 'q', 'χ', 'ɢ', 'ɕ', 'ɉ', 'ʝ', 'c', 'ç', 'd͡ʑ', 't͡ɕ', 'ɣ', 'ɣ̠ ', 'x', 'x̠', 'k', 'k̠', 'ɡ', 'ɡ̠', 'ʑ', 'ʈ', 'ɖ', 'ʐ', 'ɸ', 'ʂ', 'ʒ', 'z', 

['ʃ']

In [79]:
# minimally sufficient selectors
minimally_sufficient(df, ['ʃ','s',])

{'ʃ': {'syllabic': '-',
  'consonantal': '+',
  'sonorant': '-',
  'continuant': '+',
  'delayed release': '+',
  'voice': '-',
  'labial': '-',
  'coronal': '+',
  'anterior': '-',
  'distributed': '+',
  'strident': '+',
  'dorsal': '-'},
 's': {'syllabic': '-',
  'consonantal': '+',
  'sonorant': '-',
  'continuant': '+',
  'delayed release': '+',
  'voice': '-',
  'labial': '-',
  'coronal': '+',
  'anterior': '+',
  'distributed': '-',
  'strident': '+'}}

In [80]:
# Spreadsheet Viewer
df.loc[['a', 'ħ', 'χ', 'ɕ', 'ç', 'x', 'x̠', 
        'ɬ', 'ɸ', 'ʂ', 'ʃ', 's', 'o', 'e', 'y']]

Unnamed: 0_level_0,syllabic,stress,long,consonantal,sonorant,continuant,delayed release,approximant,tap,trill,...,anterior,distributed,strident,lateral,dorsal,high,low,front,back,tense
phoneme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,+,-,-,-,+,+,0,+,-,-,...,0,0,0,-,+,-,+,-,-,0
ħ,-,-,-,+,-,+,+,-,-,-,...,0,0,0,-,+,-,+,-,+,0
χ,-,-,-,+,-,+,+,-,-,-,...,0,0,0,-,+,-,-,-,+,0
ɕ,-,-,-,+,-,+,+,-,-,-,...,+,+,+,-,+,+,-,+,-,0
ç,-,-,-,+,-,+,+,-,-,-,...,-,+,-,-,+,+,-,+,-,0
x,-,-,-,+,-,+,+,-,-,-,...,0,0,0,-,+,+,-,0,0,0
x̠,-,-,-,+,-,+,+,-,-,-,...,0,0,0,-,+,+,-,-,+,0
ɬ,-,-,-,+,-,+,+,-,-,-,...,+,-,-,+,-,0,0,0,0,0
ɸ,-,-,-,+,-,+,+,-,-,-,...,0,0,0,-,-,0,0,0,0,0
ʂ,-,-,-,+,-,+,+,-,-,-,...,-,-,+,-,-,0,0,0,0,0
