In [30]:
import pandas as pd
from jinja2 import Template, Environment, FileSystemLoader
from fuzzywuzzy import fuzz
from fuzzywuzzy import process


class FoodLabel(object):

    _normovane = ["n_lipid",
                  "n_saturated",
                  "n_sacharides",
                  "n_sugar",
                  "n_proteins",
                  "n_salt"]

    _product_attributes = \
        ['lipid',
         'saturated',
         'sacharides',
         'sugar',
         'protein',
         'salt']

    name_sk = ["Tuk", 
               "Nenásytené mastné kyseliny", 
               "Sacharidy", 
               "Cukry",
               "Bielkoviny", 
               "Soľ"]

    def __init__(self, hundred_grams=100,
                 xlsx_filename='list.xlsx',
                 sheet_name='corn_bageta',
                 product_sheet_name='products',
                 template_dir='templates',
                 html_file='main.html',
                 product_name='<Produkt>'):

        self.hundred_grams = hundred_grams
        self.xlsx_filename = xlsx_filename
        self.sheet_name = sheet_name
        self.product_sheet_name = product_sheet_name
        self.template_dir = template_dir
        self.html_file = html_file
        self.product_name = product_name

        self.jenv = Environment(loader=FileSystemLoader(self.template_dir))
        self.template = self.jenv.get_template('main.tpl')

    def load_products_from_file(self, _file_name=None, _sheet_name=None):
        _file_name = _file_name or self.xlsx_filename
        _sheet_name = _sheet_name or self.sheet_name
        xl = pd.ExcelFile(_file_name)
        _data_frame = xl.parse(_sheet_name)
        return _data_frame

    def get_products_from_file(self):
        description = self.load_products_from_file(self.xlsx_filename, self.sheet_name)
        data_frame = self.load_products_from_file(self.xlsx_filename, self.sheet_name)
        product_list = self.load_products_from_file(self.xlsx_filename,
                                                          self.product_sheet_name)
        '''
        calculating sum [g] (weight) of the product e.g 'bageta' 
        this function returns three data frames:
            - _desc   - this will be removed shortly
            - df
            - look_for_desc
        doesn't do much - it only calls method 3x: load_products_from_file
        '''
        return data_frame, description, product_list

    def calculate(self):
        """
        working_frame = data_frame
        product_description = description
        product_list_data = product_list
        :return:
        """
        working_frame, product_description, product_list_data = \
            self.get_products_from_file()

        _sum_of_attributes = []
        _x_hundred_grams = []

        for _n, _a in zip(FoodLabel._normovane,
                          FoodLabel._product_attributes):
            working_frame[_n] = \
                working_frame[_a].astype(float) * \
                working_frame['weight'].astype(float) / float(self.hundred_grams)

        _compare = pd.DataFrame()
        _compare = pd.merge(working_frame, 
                            product_list_data, 
                            left_on='_products', 
                            right_on='id_product')
        _dict_compare = dict(zip(_compare['id_product'], _compare['id_product_description']))
        
        
        _dict_pld = dict(zip(product_list_data['id_product'], product_list_data['id_product_description']))
        #assign_description = lambda x: _dict_compare[x] if x in _dict_compare else 'No_match'
        assign_description = lambda x: process.extractOne(x, _dict_pld.keys())
        
        working_frame['desc_from_library'] = 0
        working_frame['desc_from_library'] = working_frame['_products'].map(assign_description)

        product_weight = working_frame['weight'].sum()

        for _nr in FoodLabel._normovane:
            _soa = working_frame[_nr].sum()
            _sum_of_attributes.append(round(_soa, 1))
            
            _tmp_hundred_g = \
                self.hundred_grams * working_frame[_nr].sum() / product_weight
            _x_hundred_grams.append(_tmp_hundred_g)

        calculated_product_values = pd.DataFrame()
        calculated_product_values['name'] = \
            pd.Series(FoodLabel._product_attributes)
        calculated_product_values['sums of attributes'] = pd.Series(
            _sum_of_attributes)

        calculated_product_values['per 100g'] = pd.Series(_x_hundred_grams)
        format_calculus = lambda x: '{0:.1f}'.format(round(float(x), 1))
        calculated_product_values['per 100g'] = \
            calculated_product_values['per 100g'].map(format_calculus)

        calculated_product_values['name_sk'] = pd.Series(FoodLabel.name_sk)
        '''
                creating temporary pandas data-frame: temp
                and setting index to 'name' column so it will
                be easier to locate and calculate:
                    - lipid
                    - sacharides
                    - protein

                '''
        temp_var = calculated_product_values
        temp_var = temp_var.set_index("name")

        lipid = float(temp_var.loc['lipid', 'per 100g'])
        sacharid = float(temp_var.loc['sacharides', 'per 100g'])
        protein = float(temp_var.loc['protein', 'per 100g'])

        kj = int((17 * protein) + (37 * lipid) + (17 * sacharid))
        kcal = int((4 * protein) + (9 * lipid) + (4 * sacharid))
        kj = str(kj).replace('.', ',')
        kcal = str(kcal).replace('.', ',')

        format_repace = lambda x: str(x).replace('.', ',')
        calculated_product_values['per 100g'] = \
            calculated_product_values['per 100g'].map(format_repace)

        en_value_dict = \
            dict(zip(calculated_product_values['name_sk'],
                     calculated_product_values['per 100g']))

        return dict(items=working_frame,
                    en_value=en_value_dict,
                    kj=kj,
                    kcal=kcal,
                    total_product_weight=product_weight,
                    product_name=self.product_name,
                    dict_compare=_dict_compare)

    def render_with_jinja(self):
        content = self.calculate()
        _output = self.template.render(**content)
        with(open(self.html_file, encoding='utf8', mode='w')) as f:
            f.write(_output)
        return _output


css_51 = FoodLabel(sheet_name='css_51',
                   html_file='css_51.html',
                   product_name='css_51')
css_51.render_with_jinja()

css_52 = FoodLabel(sheet_name='css_52',
                   html_file='css_52.html',
                   product_name='css_52')
css_52.render_with_jinja()

css_53 = FoodLabel(sheet_name='css_53',
                   html_file='css_53.html',
                   product_name='css_53')
css_53.render_with_jinja()

css_54 = FoodLabel(sheet_name='css_54',
                   html_file='css_54.html',
                   product_name='css_54')
css_54.render_with_jinja()
    


'<!DOCTYPE html>\n<html>\n<head lang="sk">\n<meta charset="utf-8">\n<title>css_54</title>\n</head>\n<style>\n\n.main {\n\nwidth: 58mm;\nheight: 195mm;\ndisplay: inline-block;\nmargin: 0;\nvertical-align: middle;\nborder: 1px solid black;\ntext-align: justify;\npadding: 0 1mm;\nbox-sizing: border-box;\n}\n\n.padding {\n    padding: 5px;\n}\n#image {  \nwidth: 20mm;  \nheight: auto; \nborder: 1px solid black;\nalign: middle;\n} \n\n\n.regtext {\n\nfont-size: 0.7em;\nfont-family:Calibri, sans-serif; \nfont-style:normal; \nfont-variant:; \nfont-weight:normal; \n\n\n\n}\n\n.title {\n   font-family:"Gill Sans MT Ext Condensed Bold", fantasy;\n   font-style:; \n   font-variant:; \n   font-weight:bold; \n   font-size:25px;\n   color: red;\n   text-align: center;\n   margin: 0;\n}\n\n\n.nav3 {\n    \n    height: auto;\n    width: 200px;\n    float: left;\n    padding: 1px;\n    font-family: Arial, Helvetica, sans-serif;\n    margin: 0;\n\n    \n}\n\n\n#icons{\n    display:inline-block;\n    wid

In [13]:
a = None
a = FoodLabel()
dicto = a.calculate()

In [18]:
dicto['items']

Unnamed: 0,_products,weight,lipid,saturated,sacharides,sugar,protein,salt,desc,n_lipid,n_saturated,n_sacharides,n_sugar,n_proteins,n_salt,desc_from_library
0,Francúzska bageta,125,3.0,0.5,50.0,29.0,9.97,0.7,"50% repkový olej, pitná voda, 4,8% pasterizova...",3.75,0.625,62.5,36.25,12.4625,0.875,"pšeničná múka, pitná voda, repkový olej, drožd..."
1,Maslová Majoneza,35,66.0,5.7,32.0,1.4,0.75,0.45,"pasterizované mlieko, jedlá soľ, syridlo, mlie...",23.1,1.995,11.2,0.49,0.2625,0.1575,No_match
2,Syr Udený,36,25.7,16.7,0.9,0.6,26.0,1.4,"pšeničná múka, pitná voda, repkový olej, drožd...",9.252,6.012,0.324,0.216,9.36,0.504,No_match
3,Kápia sterilizovaná 1,20,0.2,0.0,6.1,4.8,0.8,1.0,"pšeničná múka, pitná voda, repkový olej, drožd...",0.04,0.0,1.22,0.96,0.16,0.2,"paprika, pitná voda, kvasný ocot liehový, jedl..."


In [19]:
dicto['dict_compare']

{'Francúzska bageta': 'pšeničná múka, pitná voda, repkový olej, droždie, regulátor kyslosti: E263, múku upravujúca látka: E300, E920, jedlá soľ s\xa0jódom, cukor',
 'Kápia sterilizovaná 1': 'paprika, pitná voda, kvasný ocot liehový, jedlá soľ, cibuľa, horčicové semeno, stabilizátor: chlorid vápenatý, koreniaci výťažok, sladidlo: sacharín'}

In [None]:
 assign_description = lambda x: _dict_compare[x] if x in _dict_compare else 'No_match'

In [26]:
process.extractOne("Kapia", dicto['dict_compare'])[2] 

'Kápia sterilizovaná 1'