Create dummy sales data for Eniac based on:
1. Mean sales price of one item
2. Revenue

This will be used to compare the distribution of prices for tech items sold by the two companies.

In [None]:
import numpy as np
from scipy.stats import truncnorm
import pandas as pd

def make_distribution(mean, std, low, upp):
    return truncnorm(
        (low - mean) / std, (upp - mean) / std, loc=mean, scale=std)

mean = 540 # Average item price by company report
lower_b = 0
upper_b = 7000
rev = 28000000 # Annual revenue by company report is 14,000,000€. 
                # Amount is doubled to match timeframe of Magist data
area_under_curve = .99

std_dev = 100 # Reasonable guess
dist = make_distribution(mean=mean, std=std_dev, low=lower_b, upp=upper_b)

data_sum = 0
samp_size = 5000
while data_sum < rev:
    data = dist.rvs(samp_size)
    data_sum = data.sum()
    samp_size += 500
    
data.sum()

28146449.638408914

To fit into the magist database as read by Tableau, it is necessary to have a relationship.
Therefore, dates within the same timeframe as the Magist records are randomly assigned.
The dates will not be utilized, but will allow the data to be visualized together.

In [42]:
from datetime import datetime, timedelta
import random
 
# initializing dates ranges
test_date1, test_date2 = datetime(2016, 9, 5, 12, 0, 0), datetime(2018, 8, 29, 12, 0, 0)
 
K = len(data)
 
# getting days between dates
dates_bet = test_date2 - test_date1
total_days = dates_bet.days
 
res = []
for idx in range(K):
    random.seed(a=None)
     
    # getting random days
    randay = random.randrange(total_days)
     
    # getting random dates
    res.append(test_date1 + timedelta(days=randay))
 

In [50]:
# reviewing constucted data set
import pandas as pd
data_df = pd.DataFrame({'Price': data, 'Order_date': res})
pd.to_datetime(data_df["Order_date"])
data_df.sample(10)

Unnamed: 0,Price,Order_date
43288,446.487597,2017-10-05 12:00:00
37050,630.653366,2017-05-13 12:00:00
10333,553.750267,2018-05-10 12:00:00
10732,443.979274,2016-12-30 12:00:00
47081,418.984278,2016-10-11 12:00:00
16481,442.025763,2017-05-09 12:00:00
23727,552.639008,2018-04-28 12:00:00
41396,636.179135,2017-09-05 12:00:00
51738,581.230977,2017-03-27 12:00:00
33961,477.065472,2018-03-17 12:00:00


In [None]:
data_df.describe()

Unnamed: 0,Price
count,52000.0
mean,541.277878
std,99.564348
min,108.98407
25%,474.030045
50%,541.253592
75%,609.134796
max,975.511077


In [52]:
# export data
data_df.to_csv("./eniac_rev.csv", index=False)