In [1]:
using DataFrames, DataFramesMeta, LinearAlgebra, Distributions, Optim, Revise, Plots, CSV, Serialization

In [2]:
const datadir = "/export/storage_adgandhi/MiscLi/factract";

In [18]:
string(@__DIR__) in LOAD_PATH || push!(LOAD_PATH, @__DIR__)
using BLPmodule; const m = BLPmodule;

## data

In [4]:
test1state = true
if test1state 
    data = DataFrame(CSV.File("$datadir/analysis/factract_FL.csv"));
else
    data = DataFrame(CSV.File("$datadir/analysis/factract.csv"));
end;

In [5]:
# data[data.tractid .== unique(data.tractid)[1], Cols(r"id", "restot", "pop65plus_int", "pop65_j", "fracpop_inmkt", "mktpop")];

In [6]:
# This is the big dataframe with every fac-tract combination within 10km
# println(names(data))
df = @select(data, 
    :t = :tractid, 
    :j = :facid, 
    :q = :restot,
    :M = :mktpop,
    :d = :dist ./ 100, 
    :d2 = (:dist ./ 100) .^ 2,
    :x1 = :dchrppd,
    :x2 = :rnhrppd
);
sort!(df, :j)
first(df, 3)

Unnamed: 0_level_0,t,j,q,M,d,d2,x1,x2
Unnamed: 0_level_1,Int64,String7,Int64,Float64,Float64,Float64,Float64,Float64
1,12069030105,105001,80,194.1,0.085943,0.00738619,4.269,0.285
2,12069030203,105001,80,583.0,0.0539458,0.00291015,4.269,0.285
3,12069030204,105001,80,178.4,0.0128454,0.000165004,4.269,0.285


In [7]:
# make a dataframe at the facility level
dfj = @combine(groupby(df, :j), 
    :j = first(:j), 
    :q = first(:q), 
    :x1 = first(:x1), 
    :x2 = first(:x2),
    :totalM = sum(:M)
)
sort!(dfj, :j)
first(dfj, 3)

Unnamed: 0_level_0,j,q,x1,x2,totalM
Unnamed: 0_level_1,String7,Int64,Float64,Float64,Float64
1,105001,80,4.269,0.285,4428.1
2,105005,81,4.327,0.839,14278.2
3,105009,54,3.948,0.405,7710.4


---

In [8]:
# Data
D = hcat(df.d, df.d2); #distances
X = hcat(df.x1, df.x2);

T = df.t; #tract IDs
J = df.j; #facility IDs
M = df.M; #tract populations
q = dfj.q; #one quantity per facility

In [9]:
# Parameters
nI = 300; #number of random coefs to draw
K = 2; #number of non-linear (distance) characteristics 
ζ = rand(Normal(0,1), K, nI); #RC
# disutility of distance
β = [-0.5, -1];

In [17]:
@time deltas = m.compute_deltas(q, D, M, ζ, β, J, T);

iterations: 349
dist: 9.843108550455781e-10
 79.466349 seconds (91.25 M allocations: 126.679 GiB, 6.39% gc time)


In [11]:
impliedq = m.compute_quantities(deltas, D, M, ζ, β, J, T)

394-element Vector{Float64}:
  79.99999999999999
  81.00000000000004
  53.99999999999997
  55.000000000000014
 121.00000000000006
 146.99999999999994
 158.00000000000006
  99.00000000000003
 138.99999999999997
  44.999999999999964
   ⋮
 144.00000000000006
  93.00000000000001
 129.99999999999997
  59.00000000000003
  76.00000000000003
  52.00000000000004
 250.9999999999998
  85.99999999999996
  92.00000000000004

In [12]:
# check convergence
all(isapprox(q, impliedq))

true

In [13]:
serialize("deltas.jls", deltas)
serialize("df.jls", df)
serialize("dfj.jls", dfj)
serialize("zeta.jls", ζ)
#continued in ./gmm.ipynb
;