In [60]:
import pandas as pd
import numpy as np
import json
from jsonToDF import extract_json

### Soybean oil

ID No. 946

### Vitamin K1 epoxide-1,4-diol

ID No. 23496

Each food has:
- all existing attributes from original data
- list of Compounds.

Each compound has: (possibly several contents per food-compound relation)
- all existing attributes from original data
- original content min
- original content max
- original content avg
- original unit
- citation (reference)

In [2]:
content = extract_json('Content.json')

In [48]:
content = content.dropna(subset=['orig_content'])
content

Unnamed: 0,id,source_id,source_type,food_id,orig_food_id,orig_food_common_name,orig_food_scientific_name,orig_food_part,orig_source_id,orig_source_name,...,citation_type,creator_id,updater_id,created_at,updated_at,orig_method,orig_unit_expression,standard_content,preparation_type,export
0,1,1,Nutrient,4,29,Kiwi,Actinidia chinensis PLANCHON [Actinidiaceae],Fruit,FAT,FAT,...,DATABASE,,,2014-11-05T13:42:11.000Z,2020-04-27T16:20:52.000Z,,,1955.0,raw,0
1,2,1,Nutrient,6,53,Onion,Allium cepa L. [Liliaceae],Bulb,FAT,FAT,...,DATABASE,,,2014-11-05T13:42:11.000Z,2020-04-27T16:20:52.000Z,,,1853.95,raw,0
2,3,1,Nutrient,6,53,Onion,Allium cepa L. [Liliaceae],Leaf,FAT,FAT,...,DATABASE,,,2014-11-05T13:42:11.000Z,2020-04-27T16:20:52.000Z,,,4150.0,raw,0
3,4,1,Nutrient,9,55,Chives,Allium schoenoprasum L. [Liliaceae],Leaf,FAT,FAT,...,DATABASE,,,2014-11-05T13:42:11.000Z,2020-04-27T16:20:52.000Z,,,3900.0,raw,0
4,5,1,Nutrient,11,70,Cashew,Anacardium occidentale L. [Anacardiaceae],Fruit,FAT,FAT,...,DATABASE,,,2014-11-05T13:42:11.000Z,2020-04-27T16:20:52.000Z,,,2500.0,other,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5691006,6180503,30807,Compound,268,,Jiaogulan beer,,,,,...,ARTICLE,,,2020-04-29T22:59:30.000Z,2020-04-29T22:59:30.000Z,,,1.08,"beverage, alcoholic",1
5691007,6180504,31057,Compound,268,,Guanshan beer,,,,,...,ARTICLE,,,2020-04-29T22:59:30.000Z,2020-04-29T22:59:30.000Z,,,0.27,"beverage, alcoholic",1
5691008,6180505,31057,Compound,268,,Jiaogulan beer,,,,,...,ARTICLE,,,2020-04-29T22:59:30.000Z,2020-04-29T22:59:30.000Z,,,0.54,"beverage, alcoholic",1
5691009,6180506,31026,Compound,268,,Guanshan beer,,,,,...,ARTICLE,,,2020-04-29T22:59:30.000Z,2020-04-29T22:59:30.000Z,,,0.05,"beverage, alcoholic",1


In [6]:
contents_dict = content[(content['food_id']==946) & (content['source_id']==23496) &
        (content['source_type']=='Compound')][['orig_content','orig_unit','citation']].to_dict()
contents_dict

{'orig_content': {5687474: '90.95', 5687576: '82.35'},
 'orig_unit': {5687474: 'mg/100 g', 5687576: 'mg/100 g'},
 'citation': {5687474: 'USDA', 5687576: 'USDA'}}

In [15]:
content_nums = np.array(list(map(lambda x: float(x), contents_dict['orig_content'].values())))
content_nums

array([90.95, 82.35])

In [28]:
extras = {
    'content_min': np.min(content_nums),
    'content_max': np.max(content_nums),
    'content_avg': np.mean(content_nums),
    'content_unit': list(set(contents_dict['orig_unit'].values()))[0],
    'reference': list(set(contents_dict['citation'].values()))[0]
}

In [21]:
compound = extract_json('Compound.json')

In [31]:
compound_entry = list(compound[compound['id']==23496].to_dict('index').values())[0]

In [32]:
compound_entry.update(extras)
compound_entry

{'id': 23496,
 'public_id': 'FDB023333',
 'name': 'Vitamin K1 epoxide-1,4-diol',
 'state': 'Liquid',
 'annotation_quality': 'low',
 'description': 'Vitamin K is a family of fat-soluble compounds with a common chemical structure, 2, methyl-1,4-napthoquinone. Phylloquinone is present in food of plant origin, such as green, leafy vegetables and certain plant oils, and is the predominant form in the diet. Bacterial and other forms of vitamin K, referred to as the menaquinones, differ in structure from phylloquinone in their 3-substituted lipophilic side chain. Menaquinone-4 (MK-4), which is alkylated from menadione, is present in animal feeds or is the product of tissue-specific conversion directly from dietary phylloquinone. Vitamin K is a cofactor specific to the formation of gamma-carboxyglutamyl (Gla) residues in certain proteins, including prothrombin necessary for normal hemostatic function. The naturally occurring forms of vitamin K are quinones (i.e. phylloquinone and menaquinones)

In [50]:
compound_ids = pd.unique(content[(content['food_id']==946) & (content['source_type']=='Compound')]['source_id'])
compound_list = []
for cid in compound_ids:
    contents_dict = content[(content['food_id']==946) & (content['source_id']==cid) &
        (content['source_type']=='Compound')][['orig_content','orig_unit','citation']].to_dict()
    content_nums = np.array(list(map(lambda x: float(x), contents_dict['orig_content'].values())))
    extras = {
        'content_min': np.min(content_nums),
        'content_max': np.max(content_nums),
        'content_avg': np.mean(content_nums),
        'content_unit': list(set(contents_dict['orig_unit'].values()))[0],
        'reference': list(set(contents_dict['citation'].values()))[0]
    }
    try:
        compound_entry = list(compound[compound['id']==cid].to_dict('index').values())[0]
    except IndexError:
        pass
    compound_entry.update(extras)
    compound_list.append(compound_entry)

In [55]:
food = extract_json('Food.json')

In [63]:
soybean_oil = list(food[food['id']==946].to_dict('index').values())[0]
soybean_oil['compounds'] = compound_list
soybean_oil
# json.dumps(soybean_oil)

{'id': 946,
 'name': 'Soybean oil',
 'name_scientific': None,
 'description': 'Soybean oil is a vegetable oil extracted from the seeds of the soybean (Glycine max). It is one of the most widely consumed cooking oils. As a drying oil, processed soybean oil is also used as a base for printing inks (soy ink) and oil paints.',
 'itis_id': None,
 'wikipedia_id': None,
 'picture_file_name': 'Soybean_oil.jpg',
 'picture_content_type': 'image/jpeg',
 'picture_file_size': 78352,
 'picture_updated_at': '2020-04-17T16:22:50.000Z',
 'legacy_id': None,
 'food_group': 'Herbs and Spices',
 'food_subgroup': 'Oilseed crops',
 'food_type': 'Type 2',
 'created_at': '2020-01-24T17:05:36.000Z',
 'updated_at': '2020-04-17T16:22:51.000Z',
 'creator_id': None,
 'updater_id': None,
 'export_to_afcdb': False,
 'category': None,
 'ncbi_taxonomy_id': None,
 'export_to_foodb': True,
 'public_id': 'FOOD00914',
 'compounds': [{'id': 14507,
   'public_id': 'FDB014504',
   'name': 'Folic acid',
   'state': 'Solid',
  

In [64]:
f = open('soybean_oil.json', 'w')
f.write(json.dumps(soybean_oil))
f.close()