In [1]:
using DataFrames, DataFramesMeta, LinearAlgebra, Distributions, Optim, Revise, Plots, CSV

In [2]:
const datadir = "/export/storage_adgandhi/MiscLi/factract";

In [3]:
string(@__DIR__) in LOAD_PATH || push!(LOAD_PATH, @__DIR__)
using BLPmodule;
const m = BLPmodule;

## data

In [4]:
data = DataFrame(CSV.File("$datadir/analysis/factract_FL.csv"));

In [5]:
names(data)

18-element Vector{String}:
 "facid"
 "tractid"
 "dist"
 "nfacs"
 "county"
 "paymcare"
 "occpct"
 "totbeds"
 "dchrppd"
 "rnhrppd"
 "avg_dailycensus"
 "lat"
 "lon"
 "restot"
 "state"
 "cz"
 "pop65plus_int"
 "poptot_int"

In [6]:
df = @select(data, 
    :t = :tractid, 
    :pop = :pop65plus_int,
    :j = :facid, 
    :q = :avg_dailycensus,
    :d = :dist, 
    :d2 = :dist .^ 2,
    :x1 = :dchrppd,
    :x2 = :rnhrppd
);
first(df, 6)

Unnamed: 0_level_0,t,pop,j,q,d,d2,x1,x2
Unnamed: 0_level_1,Int64,Int64,String7,Float64,Float64,Float64,Float64,Float64
1,12001000200,149,105193,142.73,5.29689,28.057,3.823,0.183
2,12001000200,149,105460,110.89,7.97425,63.5887,4.274,0.373
3,12001000200,149,105664,109.3,5.15333,26.5569,3.717,0.112
4,12001000200,149,106046,115.94,5.89566,34.7589,4.375,0.234
5,12001000200,149,105571,112.13,7.20214,51.8708,3.668,0.233
6,12001000301,278,105460,110.89,8.08636,65.3891,4.274,0.373


In [7]:
gdf = groupby(df, :j);

In [8]:
pop_scale_factor = 0.1
df = @transform(gdf, :msize_j = sum(:pop) * pop_scale_factor)
@transform!(df, :s = :q ./ :msize_j);

In [9]:
names(df)

10-element Vector{String}:
 "t"
 "pop"
 "j"
 "q"
 "d"
 "d2"
 "x1"
 "x2"
 "msize_j"
 "s"

In [10]:
cols = Not(Cols(r"x", r"d"));

In [11]:
sort(df, [:j])[1:10, cols]

Unnamed: 0_level_0,t,pop,j,q,msize_j,s
Unnamed: 0_level_1,Int64,Int64,String7,Float64,Float64,Float64
1,12069030105,1941,105001,86.16,2311.1,0.0372809
2,12069030203,2915,105001,86.16,2311.1,0.0372809
3,12069030204,892,105001,86.16,2311.1,0.0372809
4,12069030206,739,105001,86.16,2311.1,0.0372809
5,12069030207,1538,105001,86.16,2311.1,0.0372809
6,12069030208,816,105001,86.16,2311.1,0.0372809
7,12069030209,1056,105001,86.16,2311.1,0.0372809
8,12069030306,1853,105001,86.16,2311.1,0.0372809
9,12069030307,1553,105001,86.16,2311.1,0.0372809
10,12069030308,1413,105001,86.16,2311.1,0.0372809


## 1. flow-level
### 1.a. non-BLP
#### 1.a.i. quantities (works)

In [12]:
# works
deltas = m.loop(df.q);
impliedshares = m.simple_shares(deltas)
println(impliedshares[1:6])
all(isapprox.(impliedshares, df.q))

iterations:2


[142.73000000000002, 110.89000000000001, 109.30000000000001, 115.93999999999998, 112.13000000000004, 110.89000000000001]


true

#### 1.a.ii. shares (works)

In [13]:
deltas = m.loop(df.s);
impliedshares = m.simple_shares(deltas)
println(impliedshares[1:6])
all(isapprox.(impliedshares, df.s))

iterations:2
[0.08282845868152275, 0.05527365167979264, 0.050639362490733864, 0.05967368366874259, 0.05589173561957931, 0.05527365167979264]


true

### 1.b.i. BLP

In [14]:
X = hcat(df.x1, df.x2);
nI = 1000 #number of consumer shocks to draw
K = 2 #number of characteristics
ζ = rand(Normal(0,1), K, nI);
β = [-1., -2];


In [15]:
#works
deltas = m.loop(df.s, X, ζ, β);
impliedshares = m.shares(deltas, X, ζ, β);
println(impliedshares[1:6])
all(isapprox.(df.s, impliedshares))

iterations:2


[0.08282845868152278, 0.05527365167979258, 0.05063936249073385, 0.059673683668742566, 0.055891735619579226, 0.05527365167979258]


true

---
## 2. facility-level data

In [38]:
dfj = @combine(groupby(df, :j), :j = first(:j), :q = first(:q), :s = first(:s), :auxd =  mean(:d),  :auxd2 = mean(:d)^2, :x1 = first(:x1), :x2 = first(:x2))
first(dfj, 3)

Unnamed: 0_level_0,j,q,s,auxd,auxd2,x1,x2
Unnamed: 0_level_1,String7,Float64,Float64,Float64,Float64,Float64,Float64
1,105193,142.73,0.0828285,5.31776,28.2786,3.823,0.183
2,105460,110.89,0.0552737,5.80543,33.703,4.274,0.373
3,105664,109.3,0.0506394,5.12819,26.2983,3.717,0.112


### non-BLP, shares and quantity (works)

In [39]:
deltas = m.loop(dfj.s)
impliedshares = m.simple_shares(deltas)
all(isapprox.(impliedshares, dfj.s))

iterations:2


true

In [40]:
deltas = m.loop(dfj.q)
impliedshares = m.simple_shares(deltas)
all(isapprox.(impliedshares, dfj.q))

iterations:2


true

---
BLP  
What's the X?  
Lots of NaN, but those that converge do converge.

In [47]:
X = hcat(dfj.auxd, dfj.auxd2);
Z = hcat(dfj.x1, dfj.x2);
ζ = rand(Normal(0,1), K, nI);
# disutility of distance
β = [-1., -2];

BLP with shares of j's own market


In [48]:
deltas = m.loop(dfj.s, X , ζ, β)
nanind = findall(isnan.(deltas))
println("nancount: ", sum(isnan.(deltas)), " out of ", nrow(dfj))
# println(mean(dfj[nanind, :auxd]))
nmind = .!isnan.(deltas);
impliedshares = m.shares(deltas[nmind], X[nmind, :], ζ, β)
all(isapprox.(impliedshares, dfj.s[nmind]))


iterations:2
nancount: 0 out of 394


true

BLP with facility level quantities

In [49]:
deltas = m.loop(dfj.q, X , ζ, β)
println("nancount: ", sum(isnan.(deltas)), " out of ", nrow(dfj))
nanind = findall(isnan.(deltas))
println(mean(dfj[nanind, :auxd]))
nmind = .!isnan.(deltas);
impliedq = m.shares(deltas[nmind], X[nmind, :], ζ, β)
all(isapprox.(impliedq, dfj.q[nmind]))

iterations:2
nancount: 0 out of 394
NaN


true

In [51]:
m.gmm(β, dfj.s, X, Z, ζ)


iterations:2
β: [-1.0, -2.0]
b: [3.0968756883230526, -6.092948093486484]


3.6245296260867255e-11

In [52]:
inv(X' * X )*( X'  * deltas)

2-element Vector{Float64}:
  3.166515328924106
 -5.883207931994491

---
