In [1]:
using DataFrames, DataFramesMeta, LinearAlgebra, Distributions, Optim, Revise, Plots, CSV, Serialization

In [2]:
const datadir = "/export/storage_adgandhi/MiscLi/factract";

In [3]:
string(@__DIR__) in LOAD_PATH || push!(LOAD_PATH, @__DIR__)
using BLPmodule; const m = BLPmodule;

## data

In [4]:
test1state = true
if test1state 
    data = DataFrame(CSV.File("$datadir/analysis/factract_FL.csv"));
else
    data = DataFrame(CSV.File("$datadir/analysis/factract.csv"));
end;

In [5]:
# data[data.tractid .== unique(data.tractid)[1], Cols(r"id", "restot", "pop65plus_int", "pop65_j", "fracpop_inmkt", "mktpop")];

In [6]:
# This is the big dataframe with every fac-tract combination within 10km
# println(names(data))
df = @select(data, 
    :t = :tractid, 
    :j = :facid, 
    :q = :restot,
    :M = :mktpop,
    :d = :dist ./ 100, 
    :d2 = (:dist ./ 100) .^ 2,
    :x1 = :dchrppd,
    :x2 = :rnhrppd
);
sort!(df, :j)
first(df, 3)

Unnamed: 0_level_0,t,j,q,M,d,d2,x1,x2
Unnamed: 0_level_1,Int64,String7,Int64,Float64,Float64,Float64,Float64,Float64
1,12069030105,105001,80,194.1,0.085943,0.00738619,4.269,0.285
2,12069030203,105001,80,583.0,0.0539458,0.00291015,4.269,0.285
3,12069030204,105001,80,178.4,0.0128454,0.000165004,4.269,0.285


In [7]:
# make a dataframe at the facility level
dfj = @combine(groupby(df, :j), 
    :j = first(:j), 
    :q = first(:q), 
    :x1 = first(:x1), 
    :x2 = first(:x2),
    :totalM = sum(:M)
)
sort!(dfj, :j)
first(dfj, 3)

Unnamed: 0_level_0,j,q,x1,x2,totalM
Unnamed: 0_level_1,String7,Int64,Float64,Float64,Float64
1,105001,80,4.269,0.285,4428.1
2,105005,81,4.327,0.839,14278.2
3,105009,54,3.948,0.405,7710.4


---

In [15]:
nI = 100; #number of random coefs to draw
K = 2; #number of X (distance) characteristics 
D = hcat(df.d, df.d2);
X = hcat(df.x1, df.x2);

ζ = rand(Normal(0,1), K, nI); #RC

# disutility of distance
β = [-0.5, -1];
T = df.t;
T_set = unique(T)
J = df.j;
J_set = unique(J);
q = dfj.q; #one per facility
M = df.M;

In [16]:
using BLPmodule; const m = BLPmodule;

In [22]:
deltas = m.compute_deltas(q, D, M, ζ, β, J, J_set, T, T_set);

394-element Vector{Float64}:
 -3.911959299091234
 -5.064892392521669
 -4.819856859211641
 -4.710411577243193
 -4.180080761210304
 -0.10236261505165167
 -3.48678276642618
 -3.5827049034108613
 -2.863070610111901
 -5.301823768149701
  ⋮
 -3.4996620669818777
 -3.1935651808396583
 -3.378678382647972
 -0.5174151200787867
 -0.7759905114065155
 -4.374358116633607
 -3.8483235045133215
 -3.9405503096011945
 -4.024411871632947

In [30]:
# just describing the deltas
# describe(DataFrame(deltas = deltas), :mean, :min, :q25, :median, :q75, :max)

In [31]:
impliedq = m.q(deltas, D, M, ζ, β, J, J_set, T, T_set)

394-element Vector{Float64}:
  79.99999999999999
  81.00000000000003
  54.0
  54.99999999999999
 121.00000000000009
 146.99999999999997
 158.00000000000006
  98.99999999999996
 138.99999999999994
  44.99999999999997
   ⋮
 144.0
  93.00000000000007
 129.99999999999997
  59.00000000000003
  76.0
  51.99999999999998
 250.99999999999994
  85.99999999999997
  92.00000000000004

In [32]:
all(isapprox(dfj.q, impliedq))

true

In [183]:
describe(df, :mean, :min, :q25, :median, :q75, :max, cols = [:d, :q, :M])

Unnamed: 0_level_0,variable,mean,min,q25,median,q75,max
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Float64,Float64,Real
1,d,0.061227,0.000182547,0.0419854,0.0639865,0.0824626,0.0999968
2,q,113.566,20.0,92.0,109.0,118.0,276.0
3,M,99.2366,0.2,52.3,79.4,124.1,3030.5


In [190]:
dfj[dfj.j.==problem_j,:]

Unnamed: 0_level_0,j,q,x1,x2,totalM
Unnamed: 0_level_1,String7,Int64,Float64,Float64,Float64
1,105140,225,4.008,0.454,4601.4


In [None]:
describe(df[J.==problem_j, :], :mean, :min, :q25, :median, :q75, :max, cols = [:d, :q, :M])

Unnamed: 0_level_0,variable,mean,min,q25,median,q75,max
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Float64,Float64,Real
1,d,0.0623871,0.00509142,0.0428195,0.069729,0.0851876,0.0997734
2,q,225.0,225.0,225.0,225.0,225.0,225.0
3,M,50.5648,1.4,29.95,48.4,66.0,117.2


In [138]:
describe(df, :mean, :median, :min, :max, cols = [:d, :q, :M])

Unnamed: 0_level_0,variable,mean,median,min,max
Unnamed: 0_level_1,Symbol,Float64,Float64,Real,Real
1,d,0.115343,0.118155,0.000182547,0.199999
2,q,113.293,109.0,20.0,276.0
3,M,104.066,82.6,0.2,3030.5


In [28]:
println(mean(isapprox.(impliedq[.!nanind], dfj.q[.!nanind])))
println(mean(abs.(impliedq[.!nanind] .- dfj.q[.!nanind])))

In [201]:
describe(DataFrame(diff = (abs.(impliedq[.!nanind] .- dfj.q[.!nanind]))))

Unnamed: 0_level_0,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,diff,17.2455,0.0,8.52651e-14,191.399,0,Float64


In [94]:
serialize("deltas.jls", deltas)
serialize("df.jls", df)
serialize("dfj.jls", dfj)
#continued in ./gmm.ipynb