In [1]:
import os, sys
PWD = os.getenv('PWD')
os.chdir(PWD)
sys.path.insert(0, os.getenv('PWD'))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "local_settings.py")
import django
django.setup()

from datetime import datetime, timedelta
from pandas import DataFrame, to_datetime, set_option
from TpsParse.Tps.TpsFile import TpsFile
import os
from numpy import quantile, nan
from math import ceil

set_option('display.max_columns', 500)
set_option('display.max_row', 1000)

from bokeh.resources import CDN
from bokeh.embed import components
from bokeh.models import Range1d, NumeralTickFormatter, ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.transform import linear_cmap, factor_cmap
from bokeh.io import show, output_notebook
from bokeh.palettes import Category20_20

from django.db.models import Sum, DateField
from django.db.models.functions import Trunc
from Utils.clean_string import clean_string

output_notebook()

In [2]:
from pprint import pprint

In [3]:
from app.models import Acumula, Imputaci

In [4]:
def parser(path):
    file = open(path, "r+b")
    tps = TpsFile(file)

    cols_and_type = []
    columns = []
    records = []
    for definition in tps.get_table_definitions():
        for field in definition.fields:
            column = field.field_name.split(':')[1].lower()
            col_and_type = (field.field_name.split(':')[1].lower(), field.type)
            if column not in columns:
                columns.append(column)
                cols_and_type.append(col_and_type)
        for record in tps.get_data_records(definition):
            records.append([r.strip().title() if isinstance(r, str) else r for r in record.values])

    table = []
    for record in records:
        dic = {}
        for col, rec in zip(columns, record):
            dic[col] = rec
        table.append(dic)

    df = DataFrame(table, columns=columns)

    name_list = ('fec', 'periodo')
    # Formatear la fecha.
    date_columns = [cname for cname, ctype in cols_and_type if cname.startswith(name_list) and ctype == 'SignedLong']
    for col in df[date_columns]:
        df[col] = datetime(1800, 12, 28) + df[col].map(timedelta)
        df.loc[df[col] == '1800-12-28', col] = None

    # Reemplazar los campos vacíos con None.
    df.replace(to_replace='', value=nan, inplace=True)
    df.replace(to_replace=0, value=None, inplace=True)
    
    # Borrar los registros que tengan todos valores nulos.
    df.dropna(axis='index', how='all', inplace=True)
    
    # Eliminar registros duplicados.
    df.drop_duplicates(keep='last', inplace=True)
    
    # Borrar las columnas que tengan todos valores nulos o ceros.
    df.dropna(axis='columns', how='all', inplace=True)
    df = df.loc[:, (df != 0).any(axis=0)]
    
    return df

In [8]:
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

In [17]:
imputaci = Imputaci.objects.values(
    'descripcion',
    'donde_acumula__descripcion',
    'total_egresos', )
#     .filter(
#     donde_acumula__descripcion='Insumos Tambo', 
#             total_egresos__gt=0
# )

df = DataFrame(imputaci)
df.rename(columns={'donde_acumula__descripcion': 'imputacion_acumulada'},
              inplace=True)
    
# gb_imputaci = DataFrame(df.groupby(
#     ['donde_acumula__descripcion','descripcion']).total_egresos.sum())
# gb_imputaci.sort_values('total_egresos', ascending=False, inplace=True)

In [22]:
df

Unnamed: 0,descripcion,imputacion_acumulada,total_egresos
0,Imp Ley 25413 Afip,Impuestos,0.0
1,Comision Transferencia,Impuestos,0.0
2,Franqueo,Impuestos,0.0
3,Comision Mant. De Cuenta,Impuestos,0.0
4,Comision Resumen,Impuestos,0.0
5,Comision Canje O/Bancos,Impuestos,0.0
6,Comision Gastos Chequera,Impuestos,0.0
7,Intereses Bancarios,Intereses,0.0
8,Semilla De Maiz,Insumos Agricultura,70695.0
9,Servicios De Saco De Grano,Servicios Agrop Agricultura,33862.8


In [5]:
from app.utils import get_imputaci
imputaci = get_imputaci()
df = DataFrame(imputaci)
df.rename(columns={'donde_acumula__descripcion': 'imputacion_acumulada'},
          inplace=True)

SynchronousOnlyOperation: You cannot call this from an async context - use a thread or sync_to_async.

In [22]:
cols = {'compra_cta_cte': sum,
        'compra_contado': sum, 
        'nota_credito_recibida': sum,
        'nota_debito_recibida': sum, 
        'egresos_cta_cte': sum, 
        'egresos_contado': sum,
        'venta_cta_cte': sum, 
        'venta_contado': sum, 
        'nota_credito_emitida': sum,
        'nota_debito_emitida': sum, 
        'cobranzas': sum,
        'total_egresos': sum}
gb = DataFrame(df.groupby('imputacion_acumulada', as_index=False).agg(cols))

In [23]:
gb

Unnamed: 0,imputacion_acumulada,compra_cta_cte,compra_contado,nota_credito_recibida,nota_debito_recibida,egresos_cta_cte,egresos_contado,venta_cta_cte,venta_contado,nota_credito_emitida,nota_debito_emitida,cobranzas,total_egresos
0,Administracion,0.0,0.0,0.0,0.0,22394800.0,0.0,330440.0,0.0,0.0,0.0,22045700.0,0.0
1,Alquiler Rural,1845325.0,0.0,0.0,0.0,0.0,0.0,115996.0,0.0,0.0,0.0,0.0,1145077.0
2,Combustible,203688.64,1143.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165549.75
3,Compra De Granos,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Compra De Inmuebles,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Compra Iva,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Creditos,1368187.0,146074.0,0.0,0.0,0.0,425548.0,0.0,0.0,0.0,0.0,0.0,661811.8
7,Encomiendas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ganaderia,99600.0,0.0,0.0,0.0,0.0,7000.0,0.0,0.0,0.0,0.0,0.0,30180.0
9,Gastos Varios,8787.65,6250.27,0.0,0.0,0.0,45407.6,0.0,0.0,0.0,0.0,0.0,15275.56


In [43]:
gb_imputaci

Unnamed: 0_level_0,Unnamed: 1_level_0,total_egresos
donde_acumula__descripcion,descripcion,Unnamed: 2_level_1
Alquiler Rural,Alquiler Campo Agricultura,184760.0
Alquiler Rural,Alquiler Campo Ganaderia,324771.0
Alquiler Rural,Alquiler Campo Tambo,635546.0
Combustible,Aceites,11450.7
Combustible,Gas,2551.59
Combustible,Gas Oil,129562.0
Combustible,Kerosene,110.46
Combustible,Nafta,21875.0
Creditos,Creditos,615152.0
Creditos,Intereses Creditos,46659.8
