In [1]:
import json

f = open("master.json", encoding="utf8")

publications = json.loads(f.read())

author_dict = {}
keyword_dict = {}
journal_dict = {}
title_list = []
title_dict = {}

def create_author_dict(publications):
    for p in publications:
        for a in p["authors"]:
            fullname = a["fname"].lower() + a["lname"].lower()
            if fullname not in author_dict.keys():
                author_dict.update({fullname.lower():[]})
                author_dict[fullname.lower()].append(p["paper"]["title"].lower())
            else:
                author_dict[fullname.lower()].append(p["paper"]["title"].lower())
    return(author_dict)

def create_keyword_dict(publications):
    for p in publications:
        for k in p["paper"]["keywords"]:
            if k.lower() not in keyword_dict.keys():
                keyword_dict.update({k.lower():[]})
                keyword_dict[k.lower()].append(p["paper"]["title"].lower())
            else:
                keyword_dict[k.lower()].append(p["paper"]["title"].lower())
    return(keyword_dict)

def create_journal_dict(publications):
    for p in publications: 
        if p["source"]["journal"].lower() not in journal_dict.keys():
            journal_dict.update({p["source"]["journal"].lower():[]})
            journal_dict[p["source"]["journal"].lower()].append(p["paper"]["title"].lower())
        else:
            journal_dict[p["source"]["journal"].lower()].append(p["paper"]["title"].lower())
    return(journal_dict)

def create_title_list(publications):
    for p in publications:
        title_list.append(p["paper"]["title"].lower())
    return(title_list)

def pages_error(publications):
    for p in publications:
        try:
            int(p["source"]["pages"]["s_page"])
            int(p["source"]["pages"]["e_page"])
        except KeyError:
            p["source"]["pages"] = {"s_page":0, "e_page":0}
        except TypeError:
            p["source"]["pages"]["s_page"] = 0
            p["source"]["pages"]["e_page"] = 0
    return publications


def cauthor_error(publications):
    for p in publications:
        for a in p["authors"]:
            try:
                a["cauthor"]
            except KeyError:
                a["cauthor"] = False
    return publications

def corr(publications):
    for p in publications:
        for a in p["authors"]:
            if a["cauthor"] == True:
                p["corr"] = True
        p.setdefault("corr", False)
    return publications

def create_title_dict(publications):
    for p in publications:
        if p["paper"]["title"].lower() not in title_dict.keys():
            title_dict[p["paper"]["title"].lower()]={"year":p["year"],"num_page": p["source"]["pages"]["e_page"]-p["source"]["pages"]["s_page"]+1, "corr": p["corr"]}
    return title_dict


def sanitise_year(): 
    while True:
        year = input("Please enter a range for year between 1995 and 2021:").split("-")
        if year != [""]:
            try:
                year_int = [int(i) for i in year]
                if len(year_int) == 2:
                    break
                elif len(year_int) == 1:
                    for i in year_int:
                        return [i,i+1]
                        break
            except ValueError:
                print("Please enter a valid range.")
        else:
            year_int = [1995,2021]
            break
    return year_int

def sanitise_pages(): 
    while True:
        pages = input("Please enter a range between 0 to 99 for the number of pages:").split("-")
        if pages != [""]:
            try:
                pages_int = [int(i) for i in pages]
                if len(pages_int) == 2:
                    break
                elif len(pages_int) == 1:
                    for i in pages_int:
                        return [i, i+1]
                        break
            except ValueError:
                print("Please enter a valid range.")
        else:
            pages_int = [0,99]
            break
    return pages_int

def sanitise_corr_author():
    corr_author = input("Does the article need a corresponding author? Yes/No:").lower()
    if corr_author in ["yes","no"]:
        cl = [True if corr_author == "yes" else False]
    else:
        cl = [True,False]
    return cl
            
def menu():
    print("To maximise the accuracy of your search, please provide as much information as you have for the following fields. However, you may skip if you do not have any requirements for the field. If you have more than 1 piece of information that you would like to include in each field, simply include a comma between them. Let's begin the search!")
    field_list = []
    author = input("First or last name of author:").lower().split(",")
    journal = input("Name of journal:").lower().split(",")
    title = input("Title of article:").lower().split(",")
    keyword = input("Keyword(s):").lower().split(",")
    field_list = [author, journal, title, keyword] #note order
    any_or_all = input("Do you want to include all or any of the above fields? All/Any:").lower() or "any"
    if any_or_all in ["all", "any"]:
        pass
    else:
        any_or_all == "any"
    filter_list = []
    y = sanitise_year()
    p = sanitise_pages()
    c = sanitise_corr_author()
    filter_list = [y, p, c]
    return(field_list, any_or_all, filter_list)


def flatten(t):
    return [item for sublist in t for item in sublist]

def search(d, i):
    res = [val for key,val in d.items() if i in key]
    res = set(flatten(res))
    return(res) 

def search_author(field_list):
    if field_list != ['']:
        for i in field_list:
            res1 = search(author_dict,i)
    else:
        res1 = set()
    return res1

def search_journal(field_list):
    if field_list != ['']:
        for i in field_list:
            res2 = search(journal_dict,i)  
    else:
        res2= set()
    return res2

def search_title(field_list):
    if field_list != ['']:
        for i in field_list:
            res3 = set([t for t in title_list if i in t])
    else:
        res3 = set()
    return res3

def search_keyword(field_list):
    if field_list != ['']:
        for i in field_list:
            res4 = search(keyword_dict,i)
    else:
        res4 = set()
    return res4

def to_map(f,n):
    return f(n)

def combine_search(field_list, any_or_all):
    allf = [search_author, search_journal, search_title, search_keyword]
    r = list(map(to_map, allf, field_list))
    results = list(filter(lambda x: bool(x), r))
    if any_or_all == "all":
        final = results[0].intersection(*results)
    else:
        final = results[0].union(*results)
    return final

def pages_year_filter(title_dict, final, filter_list):
    fres = []
    for i in final:
            conditions = [title_dict[i]['year'] in range(filter_list[0][0], filter_list[0][1]),title_dict[i]['num_page'] in range(filter_list[1][0], filter_list[1][1]), title_dict[i]["corr"] in filter_list[2]]
            if all(conditions):
                fres.append(i)
    print(f"There are {len(fres)} articles found:")
    for i,n in enumerate(fres,start=1):
        print(f"{i}.{n.capitalize()}")

def main():
    author_dict = create_author_dict(publications)
    keyword_dict = create_keyword_dict(publications)
    journal_dict = create_journal_dict(publications)
    title_list = create_title_list(publications)
    publications_final = pages_error(publications)
    publications_final = cauthor_error(publications)
    publications_final = corr(publications)
    title_dict = create_title_dict(publications_final)
    all_searches = menu()
    filter_list = all_searches[2]
    print("##################################################################################################################")
    final = combine_search(all_searches[0], all_searches[1])
    pages_year_filter(title_dict, final, all_searches[2])

if __name__ == "__main__":
    main()



    

        


To maximise the accuracy of your search, please provide as much information as you have for the following fields. However, you may skip if you do not have any requirements for the field. If you have more than 1 piece of information that you would like to include in each field, simply include a comma between them. Let's begin the search!
First or last name of author:detenber
Name of journal:
Title of article:
Keyword(s):
Do you want to include all or any of the above fields? All/Any:All
Please enter a range for year between 1995 and 2021:1995-2020
Please enter a range between 0 to 99 for the number of pages:
Does the article need a corresponding author? Yes/No:No
##################################################################################################################
There are 12 articles found:
1.Getting to know you: exploring the development of relational intimacy in computer‐mediated communication
2.Roll 'em!: the effects of picture motion on emotional responses
3.The impact