In [None]:
import json
import pandas as pd
import os
import numpy as np
from concurrent.futures import ThreadPoolExecutor

## Load json (serial)

In [None]:
raw_data = pd.DataFrame()

# Loop through years from 2018 to 2023
for year in range(2018, 2024):
    directory = f"../Data 2018-2023/Project/{year}/"

    # Ensure the directory exists before proceeding
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist.")
        continue

    # Get all JSON files in the directory
    json_files = [f for f in os.listdir(directory) if f.endswith("")]

    # Sort the files to ensure they are processed in order
    json_files.sort()

    for file_name in json_files:
        file_path = os.path.join(directory, file_name)
        print("Reading", file_path)

        if os.path.exists(file_path):
            try:
                with open(file_path, "r", encoding="utf-8") as file:
                    data = json.load(file)

                temp_df = pd.json_normalize(data)
                raw_data = pd.concat([raw_data, temp_df], ignore_index=True)
            except UnicodeDecodeError as e:
                print(f"UnicodeDecodeError while processing {file_path}: {e}")
            except json.JSONDecodeError as e:
                print(f"JSONDecodeError in file {file_path}: {e}")
        else:
            print(f"File {file_path} does not exist.")

## Load json (multithread)

In [None]:
def load_to_dataframe(root_dir):
    """
    expected the path to contains only .json file
    """
    paths = []
    records = []

    def find_json_files(root_dir):
        """
        Recursively finds all JSON files in the given directory.
        """
        for dirpath, _, filenames in os.walk(root_dir):
            for file in filenames:
                paths.append(os.path.join(dirpath, file))

    def read_json_file(path):
        try:
            with open(path, "r", encoding="utf-8") as file:
                data = json.load(file)
            return pd.json_normalize(data).to_dict("records")[0]
        except UnicodeDecodeError as e:
            print(f"UnicodeDecodeError while processing {path}: {e}")
        except json.JSONDecodeError as e:
            print(f"JSONDecodeError in file {path}: {e}")
        return

    find_json_files(root_dir)

    with ThreadPoolExecutor() as executor:
        records = list(executor.map(read_json_file, paths))
        
    # Combine all DataFrames into a single DataFrame
    combined_df = pd.DataFrame(records)
    return combined_df

raw_data = load_to_dataframe("../Data 2018-2023/Project/") #change to your data directory
raw_data.shape

In [None]:
raw_data.to_csv("raw_data.csv")

## Data Exploration

In [None]:
df = pd.DataFrame.copy(raw_data)
df.shape

In [None]:
df = pd.read_csv("data/raw_data.csv")

In [None]:
df.columns

In [None]:
# Remove null columns
threshold = 0.5

max_null = 0

for col in df.columns:
    n_null = df[col].isnull().mean()

    if n_null > max_null:
        max_null = n_null
    
    if df[col].isnull().mean() > threshold:
        df.drop(columns=col, axis=1, inplace=True)
        
print(max_null)

In [None]:
df.columns

In [None]:
# Remove useless columns

useless_cols = [
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@type",
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@state",
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@stage",  # just status
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@day",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@timestamp",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@month",  # just year is enough
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@day",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@month",
    "abstracts-retrieval-response.item.bibrecord.head.abstracts",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.@country",  # is short form of country
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-type.@code",  # just type
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-language.@xml:lang",  # short form of language
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle-abbrev",  # title not needed
    "abstracts-retrieval-response.item.bibrecord.head.source.website.ce:e-address.$",  # website
    "abstracts-retrieval-response.item.bibrecord.head.source.website.ce:e-address.@type",  # type of above
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagerange.@first",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagerange.@last",  # could be good if we do how much pages ref prediction
    "abstracts-retrieval-response.item.bibrecord.head.source.@type",  # just type
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle",  # just title
    "abstracts-retrieval-response.item.bibrecord.head.source.@srcid",  # id
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.month",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.year",  # redundant publish year
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.date-text.@xfab-added",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.date-text.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.day",  # just year is enough
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:initials", # useless first char name
    "abstracts-retrieval-response.item.bibrecord.item-info.copyright.$",
    "abstracts-retrieval-response.item.bibrecord.item-info.copyright.@type",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@day",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@timestamp",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@year",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@month",  # why it created in 2020 when it is 2018 file 💀
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.itemid",
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.ce:doi",  # ids
    "abstracts-retrieval-response.coredata.srctype",  # just type
    "abstracts-retrieval-response.coredata.eid",  # ids
    "abstracts-retrieval-response.coredata.prism:url",
    "abstracts-retrieval-response.coredata.subtypeDescription",
    "abstracts-retrieval-response.coredata.link",
    "abstracts-retrieval-response.coredata.source-id",
    "abstracts-retrieval-response.coredata.prism:endingPage",
    "abstracts-retrieval-response.coredata.openaccess",
    "abstracts-retrieval-response.coredata.openaccessFlag",  # not necessary
    # "abstracts-retrieval-response.coredata.prism:doi",
    "abstracts-retrieval-response.coredata.prism:startingPage",
    "abstracts-retrieval-response.coredata.subtype",
    "abstracts-retrieval-response.coredata.dc:identifier",  # also id
    "abstracts-retrieval-response.coredata.publishercopyright",
    # "abstracts-retrieval-response.coredata.dc:publisher",
    "abstracts-retrieval-response.language.@xml:lang",
    "abstracts-retrieval-response.authors.author",  # redundant author
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.@pui-match",  # what is this?
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.@has-funding-info",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-addon-generated-timestamp",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-addon-type",  # link
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.abstract-language.@xml:lang",  # redundant with language
    "abstracts-retrieval-response.item.bibrecord.head.source.translated-sourcetitle.$", # title
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.voliss.@volume",
    "abstracts-retrieval-response.item.bibrecord.head.source.issn",  # id?
    "abstracts-retrieval-response.coredata.dc:description",
    "abstracts-retrieval-response.coredata.prism:volume",
    "abstracts-retrieval-response.coredata.prism:issn",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-text",  # basically desc
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.postal-code",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.author-keywords.author-keyword",  # probably too many unique
    "abstracts-retrieval-response.authkeywords.author-keyword",  # probably too many unique
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.voliss.@issue",
    "abstracts-retrieval-response.coredata.prism:issueIdentifier",
    "abstracts-retrieval-response.coredata.prism:pageRange",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding", # we donot care funding
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.@complete",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant-text.$",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant-text.@xml:lang",
    "abstracts-retrieval-response.idxterms.mainterm",
    "abstracts-retrieval-response.item.bibrecord.head.source.translated-sourcetitle.@xml:lang",
]

df.drop(columns=useless_cols, axis=1, inplace=True)

In [None]:
df.columns

In [None]:
column_renaming = {
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@year": "date_delivered_year",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@year": "date_sort_year",
    "abstracts-retrieval-response.item.bibrecord.head.author-group": "author_group",
    "abstracts-retrieval-response.item.bibrecord.head.citation-title": "citation_title",
    # "abstracts-retrieval-response.item.bibrecord.head.abstracts": "abstracts",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.country": "affiliation_country",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.city": "affiliation_city",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.organization": "affiliation_organization",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:given-name": "corresponding_author_given_name",
    # "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:initials": "corresponding_author_initials",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:surname": "corresponding_author_surname",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:indexed-name": "corresponding_author_indexed_name",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-language.@language": "citation_language",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.abstract-language.@language": "abstract_language",
    "abstracts-retrieval-response.item.bibrecord.head.source.@country": "source_country",
    # "abstracts-retrieval-response.item.bibrecord.head.source.translated-sourcetitle.@xml:lang": "source_translated_title_lang",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationyear.@first": "source_publication_year",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.publishername": "source_publisher_name",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.classificationgroup.classifications": "classificationgroup",
    "abstracts-retrieval-response.item.bibrecord.item-info.dbcollection": "dbcollection",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.@refcount": "ref_count",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference": "reference",
    "abstracts-retrieval-response.affiliation": "affiliation",
    "abstracts-retrieval-response.coredata.prism:coverDate": "coverDate",
    "abstracts-retrieval-response.coredata.prism:aggregationType": "aggregationType",
    "abstracts-retrieval-response.coredata.dc:creator.author": "author",
    "abstracts-retrieval-response.coredata.prism:publicationName": "publicationName",
    "abstracts-retrieval-response.coredata.citedby-count": "citedby_count",
    # "abstracts-retrieval-response.coredata.subtype": "subtype",
    # "abstracts-retrieval-response.coredata.prism:pageRange": "pageRange",
    "abstracts-retrieval-response.coredata.prism:doi": "doi",
    "abstracts-retrieval-response.coredata.dc:title": "title",
    "abstracts-retrieval-response.subject-areas.subject-area": "subject_area",
    # "abstracts-retrieval-response.coredata.publishercopyright": "publishercopyright",
    "abstracts-retrieval-response.coredata.dc:publisher": "publisher",
    # "abstracts-retrieval-response.idxterms.mainterm": "mainterm",
}

df.rename(columns=column_renaming, inplace=True)

In [None]:
# df.dropna(axis=0, inplace=True)

for col in df.columns:
    # if (df[col].isnull()):
    print(col, df[col].isnull().mean())

In [None]:
df.info()

## Combine Scraping

In [None]:
df_scraped = pd.read_csv("data/ref_cite_count.csv")
df_scopus_2024 = pd.read_csv("data/scopus_data.csv")

In [None]:
df_scraped.info()

In [None]:
df_scopus_2024.info()

In [None]:
# Step 1: Normalize the 'author' column by converting lists to strings
df['author_str'] = df['author'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
df_scraped['author_str'] = df_scraped['author'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)

# Step 2: Merge df with df_scraped based on the 'author_str' column
df_merged = df.merge(df_scraped[['author_str', 'references_count', 'citations_count', 'doi']],
                     on='author_str',
                     how='left')

df['ref_count'] = df['ref_count'].fillna(df_merged['references_count'])
df['doi'] = df['doi'].fillna(df_merged['doi_y'])


# Drop the temporary 'author_str' column
df.drop(columns=['author_str'], inplace=True)

In [None]:
print("Columns in df but not in df_scopus_2024:", set(df.columns) - set(df_scopus_2024.columns))
print("Columns in df_scopus_2024 but not in df:", set(df_scopus_2024.columns) - set(df.columns))

# Align the columns by adding missing columns with NaN values
for col in df.columns:
    if col not in df_scopus_2024.columns:
        df_scopus_2024[col] = "Unknown"

# Reorder columns in df_scopus_2024 to match df
df_scopus_2024 = df_scopus_2024[df.columns]

# Concatenate the DataFrames
df_combined = pd.concat([df, df_scopus_2024], ignore_index=True)


In [None]:
df_combined.info()

## Fill Null

In [None]:
df = df_combined

In [None]:
# convert to datetime
df['coverDate'] = pd.to_datetime(df['coverDate'], errors='coerce')

In [None]:
# Convert columns to numeric
df['source_publication_year'] = pd.to_numeric(df['source_publication_year'], errors='coerce')
df['date_sort_year'] = pd.to_numeric(df['date_sort_year'], errors='coerce')
df['date_delivered_year'] = pd.to_numeric(df['date_delivered_year'], errors='coerce')

df['citedby_count'] = pd.to_numeric(df['citedby_count'], errors='coerce')
df['ref_count'] = pd.to_numeric(df['ref_count'], errors='coerce')

# Fill NaN with the mean value and convert to integer type
df['citedby_count'] = df['citedby_count'].fillna(df['citedby_count'].mean()).astype(int)
df['ref_count'] = df['ref_count'].fillna(df['ref_count'].mean()).astype(int)

In [None]:
df.fillna('Unknown', inplace=True)

In [None]:
df.info()

In [None]:
df.to_csv('full_data.csv')

In [None]:
df['author_group']

## Preparing data for visualization

In [None]:
pretty_df = raw_data.copy()

def create_reference_column(series:pd.Series) -> list:
    """
    get text of all reference, convert into a list
    """
    references = series.loc["abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference"]
    refcount = series.loc["abstracts-retrieval-response.item.bibrecord.tail.bibliography.@refcount"]
    if refcount is np.nan:
        # no reference
        return []
    if (references is np.nan) and (int(refcount) == 1):
        # single reference
        return [series.loc["abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-fulltext"]]
    else:
        # multiple reference
        columns = []
        for reference in references:
            columns.append(reference.get("ref-fulltext"))
        return columns

def get_asjc_code(series:pd.Series) -> list:
    """
    get all ASJC codes and convert to list
    """
    classifications = series.loc["abstracts-retrieval-response.subject-areas.subject-area"]
    if len(classifications) == 0:
        return []
    return [code["@code"] for code in classifications]

def create_authors_column(series:pd.Series):
    """
    put all author indexed name into a list
    """
    authors_data = series.loc["abstracts-retrieval-response.authors.author"]
    if type(authors_data) is list:
        #multiple author
        return [author["ce:indexed-name"] for author in authors_data]
    else:
        return []

def create_main_auth_column(series=pd.Series):
    """
    main author's indexed name
    """
    name = series.loc["abstracts-retrieval-response.coredata.dc:creator.author"][0]["ce:indexed-name"]
    return name

def create_affiliation_country(series=pd.Series):
    """
    location in format "{city}, {country}"
    """
    affils = series.loc["abstracts-retrieval-response.affiliation"]
    if type(affils) is list:
        return  list(set([f"{affil.get("affiliation-city","")}, {affil["affiliation-country"]}" for affil in affils]))
    return np.nan

# format: references text (list(str))
pretty_df["references"] = pretty_df.apply(lambda x : create_reference_column(x) , axis=1)
# format: ASJC code (list(str))
pretty_df["ASJC_code"] = pretty_df.apply(lambda x : get_asjc_code(x) , axis=1)
# format: authors indexed name (list(str))
pretty_df["authors"] = pretty_df.apply(lambda x : create_authors_column(x) , axis=1)
# format: main author indexed name (str)
pretty_df["main_author"] = pretty_df.apply(lambda x : create_main_auth_column(x) , axis=1)
# format: affiliations country (list(str))
pretty_df["affiliations_country"] = pretty_df.apply(lambda x : create_affiliation_country(x) , axis=1)
# format: reference count (int)
pretty_df["ref_count"] = pretty_df["abstracts-retrieval-response.item.bibrecord.tail.bibliography.@refcount"].astype(dtype=int , errors="ignore")

rename_map = {
    "abstracts-retrieval-response.coredata.dc:title" : "title",
    "abstracts-retrieval-response.coredata.citedby-count" : "citedby_count",
    "abstracts-retrieval-response.coredata.prism:publicationName" : "journal_title",
    "abstracts-retrieval-response.coredata.prism:issn" : "issn",
    "abstracts-retrieval-response.coredata.eid" : "eid",
    "abstracts-retrieval-response.coredata.prism:doi" : "doi",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency-country" : "funding_agency_country",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.@has-funding-info" : "is_funding",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@year" : "year",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@month" : "month",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@day" : "day"
    
}
pretty_df.rename( columns=rename_map , inplace=True)

# convert to int (0-1)
pretty_df["is_funding"] = pretty_df["is_funding"].fillna("0")
pretty_df["is_funding"] = pretty_df["is_funding"].astype(int)

pretty_df["delivered_date"] = pd.to_datetime(pretty_df[['year', 'month', 'day']])

In [None]:
to_remove = [
    "year",
    "month",
    "day",
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@state",
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@type",
    "abstracts-retrieval-response.item.ait:process-info.ait:status.@stage",
    # "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@day",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@timestamp",
    # "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@year",
    # "abstracts-retrieval-response.item.ait:process-info.ait:date-delivered.@month",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@day",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@year",
    "abstracts-retrieval-response.item.ait:process-info.ait:date-sort.@month",
    "abstracts-retrieval-response.item.bibrecord.head.author-group",
    "abstracts-retrieval-response.item.bibrecord.head.citation-title",
    "abstracts-retrieval-response.item.bibrecord.head.abstracts",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.country",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.city",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.organization",
    # "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:given-name",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:initials",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:degrees",
    # "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:surname",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:indexed-name",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-type.@code",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-language.@language",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-language.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle-abbrev",
    "abstracts-retrieval-response.item.bibrecord.head.source.website.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.website.ce:e-address.@type",
    "abstracts-retrieval-response.item.bibrecord.head.source.@country",
    "abstracts-retrieval-response.item.bibrecord.head.source.translated-sourcetitle.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagerange.@first",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagerange.@last",
    "abstracts-retrieval-response.item.bibrecord.head.source.@type",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationyear.@first",
    "abstracts-retrieval-response.item.bibrecord.head.source.isbn",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.publishername",
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle",
    "abstracts-retrieval-response.item.bibrecord.head.source.@srcid",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.month",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.year",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.date-text.@xfab-added",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.date-text.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.day",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.classificationgroup.classifications",
    "abstracts-retrieval-response.item.bibrecord.item-info.copyright.$",
    "abstracts-retrieval-response.item.bibrecord.item-info.copyright.@type",
    "abstracts-retrieval-response.item.bibrecord.item-info.dbcollection",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@day",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@timestamp",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@year",
    "abstracts-retrieval-response.item.bibrecord.item-info.history.date-created.@month",
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.itemid",
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.ce:doi",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.@refcount",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference",
    "abstracts-retrieval-response.affiliation",
    "abstracts-retrieval-response.coredata.srctype",
    # "abstracts-retrieval-response.coredata.eid",
    "abstracts-retrieval-response.coredata.prism:coverDate",
    "abstracts-retrieval-response.coredata.prism:aggregationType",
    "abstracts-retrieval-response.coredata.prism:url",
    "abstracts-retrieval-response.coredata.subtypeDescription",
    "abstracts-retrieval-response.coredata.dc:creator.author",
    "abstracts-retrieval-response.coredata.link",
    "abstracts-retrieval-response.coredata.prism:isbn",
    # "abstracts-retrieval-response.coredata.prism:publicationName",
    "abstracts-retrieval-response.coredata.source-id",
    # "abstracts-retrieval-response.coredata.citedby-count",
    "abstracts-retrieval-response.coredata.subtype",
    "abstracts-retrieval-response.coredata.prism:pageRange",
    # "abstracts-retrieval-response.coredata.dc:title",
    "abstracts-retrieval-response.coredata.prism:endingPage",
    "abstracts-retrieval-response.coredata.openaccess",
    "abstracts-retrieval-response.coredata.openaccessFlag",
    # "abstracts-retrieval-response.coredata.prism:doi",
    "abstracts-retrieval-response.coredata.prism:startingPage",
    "abstracts-retrieval-response.coredata.dc:identifier",
    "abstracts-retrieval-response.coredata.dc:publisher",
    "abstracts-retrieval-response.idxterms",
    "abstracts-retrieval-response.language.@xml:lang",
    "abstracts-retrieval-response.authkeywords",
    "abstracts-retrieval-response.subject-areas.subject-area",
    "abstracts-retrieval-response.authors.author",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.@pui-match",
    # "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.@has-funding-info",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-addon-generated-timestamp",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-addon-type",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.country",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.postal-code",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.@afid",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.city",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.organization",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.affiliation-id.@afid",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.affiliation-id.@dptid",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.@dptid",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.author",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.abstract-language.@language",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.abstract-language.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.source.translated-sourcetitle.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.voliss.@volume",
    "abstracts-retrieval-response.item.bibrecord.head.source.isbn.@level",
    "abstracts-retrieval-response.item.bibrecord.head.source.isbn.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.isbn.@type",
    "abstracts-retrieval-response.item.bibrecord.head.source.isbn.@length",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confpublication.procpartno",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confname",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confcatnumber",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confseriestitle",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conflocation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conflocation.city",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confcode",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.enddate.@day",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.enddate.@year",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.enddate.@month",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.startdate.@day",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.startdate.@year",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confdate.startdate.@month",
    "abstracts-retrieval-response.item.bibrecord.head.source.issuetitle",
    "abstracts-retrieval-response.item.bibrecord.head.source.issn",
    "abstracts-retrieval-response.item.bibrecord.head.source.article-number",
    "abstracts-retrieval-response.affiliation.affiliation-city",
    "abstracts-retrieval-response.affiliation.@id",
    "abstracts-retrieval-response.affiliation.affilname",
    "abstracts-retrieval-response.affiliation.@href",
    "abstracts-retrieval-response.affiliation.affiliation-country",
    "abstracts-retrieval-response.coredata.dc:description",
    "abstracts-retrieval-response.coredata.prism:volume",
    # "abstracts-retrieval-response.coredata.prism:issn",
    "abstracts-retrieval-response.coredata.publishercopyright",
    "abstracts-retrieval-response.coredata.article-number",
    "abstracts-retrieval-response.idxterms.mainterm",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-text",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.postal-code",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.author-keywords.author-keyword",
    "abstracts-retrieval-response.item.bibrecord.head.source.publicationdate.date-text",
    "abstracts-retrieval-response.item.bibrecord.head.source.codencode",
    "abstracts-retrieval-response.item.bibrecord.head.source.issn.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.issn.@type",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.@complete",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant-text.$",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant-text.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant",
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.ce:pii",
    "abstracts-retrieval-response.coredata.pii",
    "abstracts-retrieval-response.authkeywords.author-keyword",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.state",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals.@source",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals.chemical",
    "abstracts-retrieval-response.item.bibrecord.item-info.external-source",
    "abstracts-retrieval-response.coredata.pubmed-id",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.affiliation-id",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.abstract-language",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.voliss.@issue",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.ce:e-address.@type",
    "abstracts-retrieval-response.coredata.prism:issueIdentifier",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.address-part",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confnumber",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conflocation.venue",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-acronym",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-agency.@iso-code",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-agency.$",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-agency-id",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conforganization",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conftheme",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confsponsors.confsponsor",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confsponsors.@complete",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency-matched-string",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency-acronym",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-id",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency-id",
    # "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding.xocs:funding-agency-country",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.address-part",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-id",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-fulltext",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.@id",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-publicationyear.@first",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-title.ref-titletext",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.refd-itemidlist.itemid",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-volisspag.voliss.@volume",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-text",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-authors.author",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-authors.et-al",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-sourcetitle",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.affiliation.address-part",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.affiliation.postal-code",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.affiliation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.affiliation.city",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.itemid.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.itemid.@idtype",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-title",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.ce:pii",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.author-keywords.author-keyword",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.citation-type.@code",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.citation-language.@language",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.citation-language.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.abstract-language.@language",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.abstract-language.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.codencode",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.sourcetitle-abbrev",
    # "abstracts-retrieval-response.item.bibrecord.head.related-item.source.@country",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.translated-sourcetitle.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.translated-sourcetitle.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.issn.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.issn.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.volisspag.voliss.@volume",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.volisspag.pagerange.@first",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.volisspag.pagerange.@last",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publicationyear.@first",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.publishername",
    # "abstracts-retrieval-response.item.bibrecord.head.related-item.source.sourcetitle",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publicationdate.month",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publicationdate.year",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publicationdate.day",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.ce:doi",
    "abstracts-retrieval-response.item.bibrecord.tail",
    "abstracts-retrieval-response.language",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:given-name",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.@seq",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:initials",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:surname",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.@role",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:indexed-name",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.confURL",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.ce:source-text",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.ce:source-text",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals.chemical.chemical-name",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.@affiliation-instance-id",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.@author-instance-id",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.@affiliation-instance-id",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.manufacturergroup.manufacturers.@type",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.manufacturergroup.manufacturers.manufacturer",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.state",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.website.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.website.ce:e-address.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.volisspag.voliss.@issue",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publicationdate.date-text",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.ce:e-address.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.article-number",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.manufacturergroup.manufacturers.manufacturer.@country",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.manufacturergroup.manufacturers.manufacturer.$",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.sequencebanks.sequencebank.@name",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.sequencebanks.sequencebank.@complete",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.sequencebanks.sequencebank.sequence-number",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.affiliation.organization.$",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.organization.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.preferred-sourcetitle",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.trademanuitem.tradename",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.trademanuitem.manufacturer.@country",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.trademanuitem.manufacturer.$",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.@type",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.issn",
    "abstracts-retrieval-response.item.bibrecord.head.source.website",
    "abstracts-retrieval-response.item.bibrecord.head.source.website.websitename",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-source-document.@source-document-type",
    "abstracts-retrieval-response.item.xocs:meta.xocs:funding-list.xocs:funding-source-document.$",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals.chemical.cas-registry-number",
    "abstracts-retrieval-response.item.bibrecord.head.source.part",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.affiliation.address-part",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.affiliation.postal-code",
    # "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.affiliation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.publisher.affiliation.city",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conflocation.state",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:degrees",
    "abstracts-retrieval-response.item.bibrecord.item-info.itemidlist.ce:ern",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.trademanuitem",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.manufacturergroup.manufacturers",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.supplement",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.refd-itemidlist.itemid.$",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.refd-itemidlist.itemid.@idtype",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-volisspag.pagerange.@first",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.sequencebanks.sequencebank.sequence-number.$",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.sequencebanks.sequencebank.sequence-number.@type",
    "abstracts-retrieval-response.item.bibrecord.head.grantlist.grant.grant-agency",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence",
    "abstracts-retrieval-response.item.bibrecord.head.source.volumetitle",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.author-keywords.author-keyword.$",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.author-keywords.author-keyword.@xml:lang",
    "abstracts-retrieval-response.authkeywords.author-keyword.@_fa",
    "abstracts-retrieval-response.authkeywords.author-keyword.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.publisher.affiliation.state",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:suffix",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.@date-locked",
    "abstracts-retrieval-response.item.bibrecord.head.author-group.affiliation.ce:text",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.chemicalgroup.chemicals.chemical.enzyme-commission-number",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.reportinfo.reportnumber",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.tradenamegroup.tradenames.trademanuitem.manufacturer",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagecount.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pagecount.@type",
    "abstracts-retrieval-response.idxterms.mainterm.$",
    "abstracts-retrieval-response.idxterms.mainterm.@weight",
    "abstracts-retrieval-response.idxterms.mainterm.@candidate",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-volisspag.voliss.@issue",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-volisspag.pagerange.@last",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.citation-language",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.ce:ern",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.descriptorgroup.descriptors.@controlled",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.descriptorgroup.descriptors.@type",
    "abstracts-retrieval-response.item.bibrecord.head.enhancement.descriptorgroup.descriptors.descriptor",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.affiliation.@affiliation-instance-id",
    "abstracts-retrieval-response.item.bibrecord.head.source.additional-srcinfo.conferenceinfo.confevent.conflocation.address-part",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-website.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ref-info.ref-website.ce:e-address.@type",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.ce:source-text",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:alt-name",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.@aii:was-generated-by",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor",
    "abstracts-retrieval-response.item.bibrecord.head.citation-info.author-keywords.author-keyword.@original",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:e-address.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.contributor.ce:e-address.@type",
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle.@xfab-added",
    "abstracts-retrieval-response.item.bibrecord.head.source.sourcetitle.$",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.affiliation.address-part",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.affiliation.@country",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.affiliation.city",
    "abstracts-retrieval-response.item.bibrecord.head.source.contributor-group.affiliation.organization.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.part",
    "abstracts-retrieval-response.item.bibrecord.head.source.volisspag.pages",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.author-keywords.author-keyword.$",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.citation-info.author-keywords.author-keyword.@xml:lang",
    "abstracts-retrieval-response.item.bibrecord.head.correspondence.person.ce:alias",
    "abstracts-retrieval-response.item.bibrecord.head.related-item.source.bib-text",
    "abstracts-retrieval-response.item.bibrecord.tail.bibliography.reference.@reference-instance-id",
]

resize_df = pretty_df.drop(columns=to_remove)

In [None]:
#compressed file format that streamlit can handle
resize_df.to_parquet('viz_data.parquet.gzip' , compression="gzip")