In [16]:
import pandas as pd
from fast_autocomplete import AutoComplete
from typing import Dict, Tuple
import json

In [2]:
## helper functions
def load_searchInfo(file_info: str):
    df = pd.read_csv(file_info, sep='\t', index_col=0)
    words = {}
    for key, row in df.iterrows():
        words.update({key: {'key':key}})
        words.update({row['gene']: {'key':key}})
        words.update({row['product']: {'key':key}})
        
        # skip empty note
        if row['note'] == '<note>':
            continue
            
        for note in str(row['note']).split(';'):
            words.update({note: {'key':key}})
    return df, words    

In [14]:
# read search terms
file_info = '../data/tableInfo.tsv'
df, words = load_searchInfo(file_info)
print(df.columns)
print(df.loc['P11798'])

Index(['gene', 'product', 'note', 'count'], dtype='object')
gene                                                  Camk2a
product    Calcium/calmodulin-dependent protein kinase ty...
note       CaM kinase II subunit alpha;CaMK-II subunit alpha
count                                                      0
Name: P11798, dtype: object


In [17]:
## interact with JSON dictionary
def autocomplete_export(words: Dict[str, Dict[str, str]], file_json: str):
    with open(file_json, "w") as fp:
        json.dump(words , fp)


def autocomplete_load(file_json: str) -> Dict[str, Dict[str, str]]:
    with open(file_json, 'r') as fp:
        words = json.load(fp)
    return words


autocomplete_export(words, '../data/autocomplete_info.json')
test = autocomplete_load('../data/autocomplete_info.json')
print(test['Camk2a'])

{'key': 'P11798'}


In [26]:
autocomplete = AutoComplete(words=words)
print(autocomplete.search(word='Camk', max_cost=3, size=10))
print(autocomplete.words['Camk2a'])
#print(autocomplete.get_count_of_word('Camk2a'))
new_count = autocomplete.get_count_of_word('Camk2a') + 1
autocomplete.update_count_of_word(word='Camk2a', count=new_count)
key = autocomplete.words['Camk2a']['key']
df.loc[key]['count'] = new_count
print(df.loc[key]['count'])

#print(autocomplete.get_count_of_word('Camk2a'))
#print(autocomplete.words['Camk2a']['key'])

[['Camkv'], ['CaMKN'], ['Camk4'], ['Camk1'], ['CaMK IV'], ['CaMKK 2'], ['CaMKK 1'], ['CaMKP-N'], ['Camk2g'], ['Camk2d']]
{'key': 'P11798'}
0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[key]['count'] = new_count


In [34]:
# test sorting
query = autocomplete.search(word='CALM', max_cost=3, size=10)
list = []
for sublist in query:
    list.append(sublist[-1])
#query = [item for sublist in query for item in sublist]
print(list)
#autocomplete_export(autocomplete.words, '../data/autocomplete_info_test.json')

['CALM', 'Calml3', 'Calmegin', 'Calmodulin-like protein 3', 'Calmodulin-binding protein P-57', 'Calmodulin regulator protein PCP4', 'Calmodulin-dependent calcineurin A subunit beta isoform', 'Calmodulin-regulated spectrin-associated protein 3']
