# Collecting experiments data in a data frame

In [None]:
using DataFrames
using Statistics
using PyPlot
using Random
using Pipe
using Test

In [None]:
# U(0,1)
# How many draws till the sum exceeds 1?
function sim_e()
    draw = Float64[]
    while true
        push!(draw, rand())
        sum(draw) > 1.0 && return draw
    end
end

In [None]:
Random.seed!(1234) # want to have the same result

In [None]:
draws = [sim_e() for _ in 1:5]

In [None]:
# Check the sum of every draw is greater than 1.0
@test all(@. sum(draws) > 1.0)

In [None]:
# Check it is the last element exceeds 1.0
@test all(@. (sum(draws) - last(draws)) < 1.0)

In [None]:
# Put lots of experiments into a data frame
df = DataFrame()

@time for i in 1:10^6
    push!(df, (id = i, pos = sim_e()))
end

In [None]:
df

In [None]:
# This form produces a :jumps column with value 1,000,000 in every row
# transform(df, :pos => length => :jumps)

# This form tells transform to go row-by-row.
transform!(df, :pos => ByRow(length) => :jumps)

In [None]:
mean(df.jumps)

In [None]:
# or 
combine(df, :jumps => mean => :jumps_mean)

In [None]:
# close to e
MathConstants.e

In [None]:
# Find a distribution of number of jumps
jumps_agg = @pipe df |> groupby(_, :jumps, sort=true) |> combine(_, :jumps => length)

In [None]:
# 1. normalize by sum
# 2. calculate theoretic values
transform(
    jumps_agg,     
    :jumps_length => (x -> x ./ sum(x))             => :simulation,
    :jumps        => ByRow(x -> (x-1)/factorial(x)) => :theory
)

In [None]:
df

In [None]:
df_test = select(
    df,
    :pos => ByRow(first) => :first,
    :pos => ByRow(last)  => :last
)

In [None]:
hist(df_test.first, 100);

In [None]:
hist(df_test.last, 100);