In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
from tqdm import tqdm

In [None]:
def merge_nutriments(arr, avg=False):
    
    arr = [a for a in arr if a]
    nut_names = set()
    for nutriments in arr:
        for nut, value in nutriments.items():
            nut_names.add(nut)
    
    merged = {}
    averger = len(arr) if avg else 1.0
    
    for nut_name in nut_names:
        quantity = 0.0
        unit = None
        
        for nutriments in arr:
            if nut_name in nutriments:
                if not unit:
                    unit = nutriments[nut_name]['unit']
                if unit == nutriments[nut_name]['unit']:
                    quantity += nutriments[nut_name]['quantity']
                
        merged[nut_name] = {
            'quantity': quantity / averger,
            'unit': unit,
            'quantity': quantity / averger
        }
        
    return merged

In [None]:
def pretty_nutriments(nuts):
    print('\n'.join(['{:25}{:.2f}\t{}'.format(k, v['quantity'], v['unit']) for k, v in nuts.items()]))

In [None]:
client = Elasticsearch(hosts='http://')

In [None]:
analysis = [a for a in Search(using=client, index='analysis').scan()]

In [None]:
restaurant_analysis = {a.id: a for a in analysis}

In [None]:
geneva = [(r, restaurant_analysis.get(r.meta.id)) for r in Search(using=client, index='restaurants').query('match', city='genève').scan() if restaurant_analysis.get(r.meta.id)]
len(geneva)

In [None]:
lausanne = [(r, restaurant_analysis.get(r.meta.id)) for r in Search(using=client, index='restaurants').query('match', city='lausanne').scan() if restaurant_analysis.get(r.meta.id)]
len(lausanne)

In [None]:
paris = [(r, restaurant_analysis.get(r.meta.id)) for r in Search(using=client, index='restaurants').query('match', city='paris').scan() if restaurant_analysis.get(r.meta.id)]
len(paris)

In [None]:
lyon = [(r, restaurant_analysis.get(r.meta.id)) for r in Search(using=client, index='restaurants').query('match', city='lyon').scan() if restaurant_analysis.get(r.meta.id)]
len(lyon)

In [None]:
bordeaux = [(r, restaurant_analysis.get(r.meta.id)) for r in Search(using=client, index='restaurants').query('match', city='bordeaux').scan() if restaurant_analysis.get(r.meta.id)]
len(bordeaux)

In [None]:
cities = {
    'geneva': geneva,
    'lausanne': lausanne,
    'paris': paris,
    'lyon': lyon,
    'bordeaux': bordeaux
}

In [None]:
for city, group in cities.items():
    print(city)
    selector = []
    for rest, agg in group:
        if len(agg.analysis):
            selector.append(agg.total.to_dict())
            
    print(len(selector))
    pretty_nutriments(merge_nutriments(selector, avg=True))
    print()