In [101]:
using DataFrames, DataFramesMeta, LinearAlgebra, Distributions, Optim, Revise, Plots, CSV, Serialization

In [102]:
const datadir = "/export/storage_adgandhi/MiscLi/factract";

In [104]:
string(@__DIR__) in LOAD_PATH || push!(LOAD_PATH, @__DIR__)
using BLPmodule;
const m = BLPmodule;

## data

In [105]:
test1state = true
if test1state 
    data = DataFrame(CSV.File("$datadir/analysis/factract_FL.csv"));
else
    data = DataFrame(CSV.File("$datadir/analysis/factract.csv"));
end;

In [106]:
names(data)

22-element Vector{String}:
 "facid"
 "tractid"
 "dist"
 "nfacs"
 "county"
 "occpct"
 "totbeds"
 "dchrppd"
 "rnhrppd"
 "avg_dailycensus"
 ⋮
 "state"
 "cz"
 "pop65plus_int"
 "poptot_int"
 "nres_mcare"
 "pop65_j"
 "fracpop_inmkt"
 "mktpop_j"
 "mktshare_j"

In [107]:
df = @select(data, 
    :t = :tractid, 
    :pop = :pop65plus_int,
    :j = :facid, 
    :q = :restot,
    :s = :mktshare_j,
    :d = :dist, 
    :d2 = :dist .^ 2,
    :x1 = :dchrppd,
    :x2 = :rnhrppd
);
first(df, 6)

Unnamed: 0_level_0,t,pop,j,q,s,d,d2,x1,x2
Unnamed: 0_level_1,Int64,Int64,String7,Int64,Float64,Float64,Float64,Float64,Float64
1,12069031000,2453,105001,80,0.0166521,12.584,158.357,4.269,0.285
2,12069030306,1853,105001,80,0.0166521,7.82126,61.1721,4.269,0.285
3,12069030207,1538,105001,80,0.0166521,4.29014,18.4053,4.269,0.285
4,12069030912,1513,105001,80,0.0166521,5.08819,25.8897,4.269,0.285
5,12069030410,1075,105001,80,0.0166521,18.8484,355.263,4.269,0.285
6,12069030805,874,105001,80,0.0166521,11.4688,131.533,4.269,0.285


In [108]:
gdf = groupby(df, :j);

In [110]:
names(df)

9-element Vector{String}:
 "t"
 "pop"
 "j"
 "q"
 "s"
 "d"
 "d2"
 "x1"
 "x2"

In [119]:
# cols = Not(Cols(r"x", r"d"));
df[1:5,:]

Unnamed: 0_level_0,t,pop,j,q,s,d,d2,x1,x2
Unnamed: 0_level_1,Int64,Int64,String7,Int64,Float64,Float64,Float64,Float64,Float64
1,12069031000,2453,105001,80,0.0166521,12.584,158.357,4.269,0.285
2,12069030306,1853,105001,80,0.0166521,7.82126,61.1721,4.269,0.285
3,12069030207,1538,105001,80,0.0166521,4.29014,18.4053,4.269,0.285
4,12069030912,1513,105001,80,0.0166521,5.08819,25.8897,4.269,0.285
5,12069030410,1075,105001,80,0.0166521,18.8484,355.263,4.269,0.285


---
### facility-level data

In [120]:
dfj = @combine(groupby(df, :j), 
    :j = first(:j), 
    :q = first(:q), 
    :s = first(:s), 
    :auxd =  log(mean(:d)),
    :x1 = first(:x1), 
    :x2 = first(:x2)
)
@transform!(dfj, :auxd2 = :auxd.^2)

first(dfj, 3)

Unnamed: 0_level_0,j,q,s,auxd,x1,x2,auxd2
Unnamed: 0_level_1,String7,Int64,Float64,Float64,Float64,Float64,Float64
1,105001,80,0.0166521,2.40807,4.269,0.285,5.7988
2,105005,81,0.00252011,2.44936,4.327,0.839,5.99935
3,105009,54,0.00223139,2.53129,3.948,0.405,6.40744


### non-BLP, shares and quantity (works)

In [84]:
sum(isnan.(dfj.q))

0

In [85]:
deltas = m.loop(dfj.s)
impliedshares = m.simple_shares(deltas)
all(isapprox.(impliedshares, dfj.s))

iterations:2


true

In [86]:
deltas = m.loop(dfj.q)
impliedshares = m.simple_shares(deltas)
all(isapprox.(impliedshares, dfj.q))

iterations:2


true

---
BLP  
What's the X?  
Some NaNs, but those that converge do converge.

In [87]:
nI = 1000; #number of random coefs to draw
K = 2; #number of X (distance) characteristics 
X = hcat(dfj.auxd, dfj.auxd2);
Z = hcat(dfj.x1, dfj.x2);
ζ = rand(Normal(0,1), K, nI);
# disutility of distance
β = [-1., -2];

BLP with shares of j's own market


In [91]:
deltas = m.loop(dfj.s, X , ζ, β, verbose = true)

nanind = findall(isnan.(deltas))
println("nancount: ", sum(isnan.(deltas)), " out of ", nrow(dfj))
# println(mean(dfj[nanind, :auxd]))
nmind = .!isnan.(deltas);
impliedshares = m.shares(deltas[nmind], X[nmind, :], ζ, β)
all(isapprox.(impliedshares, dfj.s[nmind]))


iterations: 2
dist: 7.105427357601002e-15
deltas:[-31.955695855170653, -35.062493295409034, -37.66829460903063, -33.07961670817093, -36.29011582729175]
s_:    [0.01665209599999998, 0.0025201065000000065, 0.0022313865000000046, 0.003315309800000012, 0.004353771000000013]
nancount: 0 out of 410


true

BLP with facility level quantities

In [93]:
deltas = m.loop(dfj.q, X , ζ, β, verbose = true)
nancount =  sum(isnan.(deltas))
println("nancount: ", nancount, " out of ", nrow(dfj))
if nancount>0
    nanind = findall(isnan.(deltas))
    println(mean(dfj[nanind, :auxd]))
end
nmind = .!isnan.(deltas);
impliedq = m.shares(deltas[nmind], X[nmind, :], ζ, β)
all(isapprox.(impliedq, dfj.q[nmind]))

iterations: 2
dist: 7.105427357601002e-15
deltas:[-23.478450035904014, -24.68459002428954, -27.574178424629416, -23.363079317224575, -26.05761236882739]
s_:    [79.9999999999999, 80.99999999999993, 54.000000000000206, 55.00000000000009, 120.99999999999996]
nancount: 0 out of 410


true

In [94]:
serialize("deltas.jls", deltas)
serialize("dfj.jls", dfj)
#continued in ./gmm.ipynb