In [1]:
#import numpy as np
import pandas as pd
from datetime import datetime as dt
import time
import prediction
import math
import datetime

In [2]:
def current_milli_time():
    return round(time.time() * 1000)

In [3]:
def create_collection_point_dataframes(data):
    """_summary_
    Creates a lot of dataframes split by collection point number (übergabestellennummer)
     in the global namespace. Using the returned list of dataframe names you can call
        df = globals()[collection_point_df_name_list[x]] to get any of the dataframes.
    Parameters
    ----------
    data : _type_
        _description_

    Returns
    -------
    _type_
        _description_
    list of dataframe names that are created globally.
    """  
    collection_points = list(data.übergabestellennummer.unique())
    collection_points.sort()
    # create a list for the variable names
    collection_point_df_name_list = list()

    for cp in collection_points:
        
        dataframe_name = str(current_milli_time()) + "_orders_collection_point_" + cp.astype(str)
        collection_point_df_name_list.append(dataframe_name)
        # The df is assigned to "orders_collection_point_<some-collection-point-id>" variable
        Dynamic_Variable_Name = dataframe_name
        globals()[Dynamic_Variable_Name] = data.query("übergabestellennummer == @cp")
    return collection_point_df_name_list

In [4]:
def predict_and_return_error_list(cp_list, debug=False, debug_sample_size=10):
    
    number_collection_points = debug_sample_size
    count = 0
    error_list = list()
    #predicted_collection_points_list = list()
    for df_name in cp_list:
        if debug:
            if count >= number_collection_points:
                break
        df = globals()[df_name]
        if(debug):
            print(f"df_name: {df_name} has {df.shape[0]} rows")
        # filter out collection points having only one order since we can not calculate a time range between collections for them (total_days_of_collection_all_orders can not be calculated)
        df = prediction.filter_for_min3_pick_ups_and_initial_deliveries(df)
        if df.shape[0] > 0:
            x_train, y = prediction.train_test_split(df, debug)
            x_train = prediction.filter_for_min3_pick_ups_and_initial_deliveries(x_train)
            if x_train.shape[0] > 0:
                try:
                    error = prediction.calculate_error(x_train, y, debug)
                    error_list.append([df_name, error])
                    count += 1 
                except ValueError:
                    print("skipping because only calculate error if there is a prediction", )
    
    return error_list

In [5]:
orders = pd.read_pickle("../data/battery_cleaned_with_geo.pkl")

In [None]:
orders.columns
# redacted

In [None]:
orders.query("übergabestellennummer == '105759'")
# redacted

In [None]:
orders = prediction.filter_for_report(orders)
orders_comp, orders_open = prediction.filter_dataframe_for_prediction(orders)
# redacted

In [None]:
# filter completed orders by Vertragsnummer GRS ONLINE, Fahrradbatterien

orders_grs = orders_comp.query("vertragsnummer == 'GRS Online'")
print(f"GRS Online orders: {orders_grs.shape[0]}")

orders_bikes = orders_comp.query("vertragsnummer == 'Fahrradbatterien'")
print(f"Fahrradbatterien orders: {orders_bikes.shape[0]}")

# redacted


### GRS

In [10]:

orders_grs = prediction.remove_orders_with_unknown_weights(orders_grs)
# sorting by company_group, collection_point and order_date ascending
orders_grs = orders_grs.sort_values(by=['konzernnummer','übergabestellennummer', 'abholdatum'], ascending=True)

In [None]:
orders_grs.query("übergabestellennummer == 105759")
# redacted

In [12]:
grs_collection_point_list = create_collection_point_dataframes(orders_grs)

In [None]:
error_list = predict_and_return_error_list(grs_collection_point_list, debug=False)
# redacted

In [None]:
prediction.print_prediction_metrics(error_list)
# redacted

### Fahrradbatterien

In [None]:

orders_bikes = prediction.remove_orders_with_unknown_weights(orders_bikes)
orders_bikes = orders_bikes.sort_values(by=['konzernnummer','übergabestellennummer', 'abholdatum'], ascending=True)
bikes_cp_list = create_collection_point_dataframes(orders_bikes)
bikes_error_list = predict_and_return_error_list(bikes_cp_list, debug=False)
prediction.print_prediction_metrics(bikes_error_list)

# redacted