---
title: "Doubly Robust Proximal Synthetic Control: Notes on Qiu et al (2022)"
author: "Apoorva Lal"
format:
html:
code-fold: show
code-background: true
code-overflow: wrap
code-tools: true
code-link: true
highlight-style: arrow
toc: true
toc-location: body
max-width: 1600px
mainfont: IBM Plex Sans
monofont: Iosevka
number-sections: true
html-math-method: katex
colorlinks: true
embed-resources: true
execute:
cache: true
warning: false
---
[ Paper ](https://arxiv.org/abs/2210.02014)
Code sliced and subset from [ the paper's replication archive ](https://github.com/QIU-Hongxiang-David/DR_Proximal_SC) - please do this so people understand your methods.
::: {.hidden}
$$
% typographic
\newcommand\Bigbr[ 1 ] {\left[ #1 \right ] }
\newcommand\Bigcr[ 1 ] {\left\{ #1 \right \} }
\newcommand\Bigpar[ 1 ] {\left( #1 \right )}
\newcommand\Obr[ 2 ] { \overbrace{#1}^{\text{#2}}}
\newcommand\Realm[ 1 ] {\mathbb{R}^{#1}}
\newcommand\Ubr[ 2 ] {\underbrace{#1}_{\text{#2}}}
\newcommand\wb[ 1 ] {\overline{#1}}
\newcommand\wh[ 1 ] {\widehat{#1}}
\newcommand\wt[ 1 ] {\widetilde{#1}}
\newcommand\Z{\mathbb{Z}}
\newcommand\dd{\mathsf{d}}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\newcommand\epsi{\varepsilon}
\newcommand\phii{\varphi}
\newcommand\tra{^{\top}}
\newcommand\sumin{\sum_{i=1}^n}
\newcommand\sumiN{\sum_{i=1}^n}
\def\mbf#1{\mathbf{#1}}
\def\mbi#1{\boldsymbol{#1}}
\def\mrm#1{\mathrm{#1}}
\def\ve#1{\mbi{#1}}
\def\vee#1{\mathbf{#1}}
\newcommand\R{\mathbb{R}}
\newcommand\Ol[ 1 ] {\overline{#1}}
\newcommand\SetB[ 1 ] {\left\{ #1 \right\} }
\newcommand\Sett[ 1 ] {\mathcal{#1}}
\newcommand\Str[ 1 ] {#1^{*}}
\newcommand\Ul[ 1 ] {\underline{#1}}
% stats
\newcommand\Prob[ 1 ] {\mathbf{Pr}\left(#1\right)}
\def\Exp#1{{\mathbb{E}\left[ #1\right ] }}
\newcommand\var{\mathrm{Var}}
\newcommand\Supp[ 1 ] {\text{Supp}\left[ #1\right ] }
\newcommand\Var[ 1 ] {\mathbb{V}\left[ #1\right ] }
\newcommand\Expt[ 2 ] {\mathbb{E}_{#1}\left[ #2\right ] }
\newcommand\Covar[ 1 ] {\text{Cov}\left[ #1\right ] }
\newcommand\Cdf[ 1 ] {\mathbb{F}\left(#1\right)}
\newcommand\Cdff[ 2 ] {\mathbb{F}_{#1}\left(#2\right)}
\newcommand\F{\mathsf{F}}
\newcommand\ff{\mathsf{f}}
\newcommand\Data{\mathcal{D}}
\newcommand\Pdf[ 1 ] {\mathsf{f}\left(#1\right)}
\newcommand\Pdff[ 2 ] {\mathsf{f}_{#1}\left(#2\right)}
\newcommand\doo[ 1 ] {\Prob{Y |\,\mathsf{do} (#1) }}
\newcommand{\indep}{\perp \!\!\! \perp}
\newcommand\doyx{\Prob{Y \, |\,\mathsf{do} (X = x)}}
% distributions
\newcommand\Bern[ 1 ] {\mathsf{Bernoulli} \left( #1 \right )}
\newcommand\BetaD[ 1 ] {\mathsf{Beta} \left( #1 \right )}
\newcommand\Binom[ 1 ] {\mathsf{Bin} \left( #1 \right )}
\newcommand\Diri[ 1 ] {\mathsf{Dir} \left( #1 \right )}
\newcommand\Gdist[ 1 ] {\mathsf{Gamma} \left( #1 \right )}
\newcommand\Normal[ 1 ] {\mathcal{N} \left( #1 \right )}
\newcommand\Pois[ 1 ] {\mathsf{Poi} \left( #1 \right )}
\newcommand\Unif[ 1 ] {\mathsf{U} \left[ #1 \right ] }
% calc and linalg
\newcommand\dydx[ 2 ] {\frac{\partial #1}{\partial #2}}
\newcommand\Mat[ 1 ] {\mathbf{#1}}
\newcommand\normm[ 1 ] {\lVert #1 \rVert}
\newcommand\abs[ 1 ] {\vert#1\vert}
\newcommand\eucN[ 1 ] {\normm{#1}}
\newcommand\pnorm[ 1 ] {\normm{#1}_p}
\newcommand\lone[ 1 ] {\normm{#1}_1}
\newcommand\ltwo[ 1 ] {\normm{#1}_2}
\newcommand\lzero[ 1 ] {\normm{#1}_0}
\newcommand\Indic[ 1 ] {\mathbb{1}_{#1}}
$$
:::
```{r}
libreq (
magrittr, tidyverse, gmm,
# Synth,
CVXSynth,
splines, scales, ggplot2, ggpubr,
augsynth
)
data (kansas)
setDT (kansas)
kansas[, t : = (year-1990 )* 4 + qtr]
############################################################
# residualise on time trend
d1 = kansas[, .(fips, state, t, lngdpcapita)]
model = lm (lngdpcapita ~ poly (t, 2 ),
data = d1[state != "Kansas" ])
d1[, lngdpcapita.resid : = lngdpcapita - predict (model, d1)]
Y = d1[state == "Kansas" , lngdpcapita.resid]
T = nunique (d1$ t)
t = 1 : T
T0 = (2012 - 1990 ) * 4 + 1
treatment = as.numeric (t > T0)
X = dcast (
d1[state != "Kansas" , .(lngdpcapita.resid, state, t)],
t ~ state, value.var = "lngdpcapita.resid" ) %>%
.[,t : = NULL ] %>% as.matrix ()
```
## Synthetic Control
Simplex regression of $Y_{it} \sim \Mat{W}_{it}$ where coefficients are constrained to be nonnegative and sum to 1. This generates some regularisation.
```{r}
synth_weights = sc_solve (matrix (Y[t <= T0], ncol = 1 ), X[t <= T0, ])
data.frame (colnames (X), synth_weights)
```
```{r}
Abadie.SC = as.numeric (X %*% synth_weights)
(phi.hat = mean (Y[t > T0] - Abadie.SC[t > T0]))
```
Select donors with >0.1 weights
```{r}
donors = c ("North Dakota" , "South Carolina" , "Texas" , "Washington" )
# outcome series for donors
W = X[, donors]
# outcome series for everyone else
Z = X[, setdiff (colnames (X), c ("Kansas" , donors))]
data = list (t = t, W = W,
Z = Z, Y = Y, T = T,
T0 = T0, treatment = treatment)
```
## OLS
Regress pre-treatment outcomes for Kansas on all other units.
```{r}
OLS = function (data, vcovfun = c ("vcovHAC" , "NeweyWest" )) {
model = with (data, lm (Y ~ W + treatment))
phi.hat = coef (model)["treatment" ]
SC = as.numeric (cbind (1 , data$ W) %*% coef (model)[1 : (ncol (data$ W) + 1 )])
list (phi.hat = phi.hat, SC = SC)
}
OLS.result = OLS (data)
```
## Outcome Bridge
```{r}
PI.h = function (data, vcovfun = c ("vcovHAC" , "NeweyWest" )) {
vcovfun = match.arg (vcovfun)
T = data$ T
T0 = data$ T0
t = data$ t
nW = ncol (data$ W)
evalh = \(theta, data) as.numeric (cbind (1 , data$ W) %*% theta[1 : (nW + 1 )])
gh = cbind (1 , data$ Z)
ngh = ncol (gh)
###################################################################
# moment function and gradient
###################################################################
g = \(theta, data) {
h = evalh (theta, data)
g1 = T / T0 * (t <= T0) * (data$ Y - h) * gh
g2 = (t > T0) * (theta[nW + 2 ] - (data$ Y - h))
cbind (g1, g2)
}
gradv = \(theta, data) {
h = evalh (theta, data)
out = matrix (0 , ngh + 1 , nW + 2 )
# g1
out[1 : ngh, 1 : (nW + 1 )] = - t (gh) %*% (cbind (1 , data$ W) * (t <= T0)) / T0
# g2
out[ngh + 1 , nW + 2 ] = (T - T0) / T
out[ngh + 1 , 1 : (nW + 1 )] = colMeans ((t > T0) * cbind (1 , data$ W))
out
}
###################################################################
# warm start with lm
init.model.h = lm (data$ Y ~ ., data = as.data.frame (data$ W), subset = t <= T0)
init.alpha = coef (init.model.h)
init.phi = mean (data$ Y[t > T0] -
predict (init.model.h,
newdata = as.data.frame (data$ W[t > T0, , drop = FALSE ])))
theta0 = c (init.alpha, init.phi)
names (theta0) = c (paste0 ("alpha" , 0 : nW), "phi" )
###################################################################
# gmm
model = gmm (
g = g,
gradv = gradv,
x = data,
t0 = theta0,
wmatrix = "ident" ,
control = list (maxit = 1e5 ),
method = "BFGS" , vcov = "iid" )
###################################################################
cat ("Parameter estimate: \n " )
print (coef (model))
phi.hat = coef (model)["phi" ]
# vcov HAC
var = if (vcovfun == "vcovHAC" ) {
sandwich:: vcovHAC (model)
} else {
sandwich:: NeweyWest (model, prewhite = FALSE )
}
rownames (var) = colnames (var) = names (theta0)
SE = sqrt (var["phi" , "phi" ])
CI = phi.hat + qnorm (c (.025 , .975 )) * SE
SC = evalh (coef (model), data)
list (phi.hat = phi.hat, SE = SE, CI = CI,
convergence = model$ algoInfo$ convergence, SC = SC)
}
h.result = PI.h (data)
```
## Both Outcome Bridge and Treatment Bridge
Involves solving the following moment condition problem [ probably won't make sense without reading the paper ] .

```{r}
DR = function (data,
Z.states,
vcovfun = c ("vcovHAC" , "NeweyWest" )) {
vcovfun = match.arg (vcovfun)
T = data$ T
T0 = data$ T0
t = data$ t
nW = ncol (data$ W)
nZ = ncol (data$ Z[, Z.states, drop = FALSE ])
# confounding bridge functions
evalh = \(theta, data) as.numeric (cbind (1 , data$ W) %*% theta[1 : (nW + 1 )])
evalq = \(theta, data) {
as.numeric (
exp (
cbind (1 , data$ Z[, Z.states, drop = FALSE ]) %*% theta[(nW + 2 ): (nW + nZ + 2 )]
)
)
}
gh = cbind (1 , data$ Z)
ngh = ncol (gh)
gq = cbind (1 , data$ W)
ngq = ncol (gq)
######################################################################
# moment function
g = function (theta, data) {
h = evalh (theta, data)
q = evalq (theta, data)
ϕ = theta[nW + nZ + ngq + 3 ]
# centering parameters
ψ = matrix (theta[(nW + nZ + 3 ): (nW + nZ + ngq + 2 )], nrow = T, ncol = ngq, byrow = TRUE )
ψm = theta[nW + nZ + ngq + 4 ]
# h regresses y on W
g1 = (t <= T0) * (data$ Y - h) * gh
g2 = (t > T0) * (ψ - gq)
g3 = (t <= T0) * (q * gq - ψ)
g4 = (t > T0) * (ϕ - (data$ Y - h) + ψm)
g5 = (t <= T0) * (ψm - q * (data$ Y - h))
cbind (g1, g2, g3, g4, g5)
}
# gradient - try autodiff?
gradv = function (theta, data) {
h = evalh (theta, data)
q = evalq (theta, data)
out = matrix (0 , ngh + 2 * ngq + 2 , nW + nZ + ngq + 4 )
# g1
out[1 : ngh, 1 : (nW + 1 )] = - t (gh) %*% (cbind (1 , data$ W) * (t <= T0)) / T
# g2
out[(ngh + 1 ): (ngh + ngq), (nW + nZ + 3 ): (nW + nZ + ngq + 2 )] = diag ((T - T0) / T, ngq)
# g3
out[(ngh + ngq + 1 ): (ngh + 2 * ngq), (nW + nZ + 3 ): (nW + nZ + ngq + 2 )] = diag (- T0 / T, ngq)
M1 = t (gq)
M2 = q * (t <= T0) * cbind (1 , data$ Z[, Z.states, drop = FALSE ]) / T
out[(ngh + ngq + 1 ): (ngh + 2 * ngq), (nW + 2 ): (nW + nZ + 2 )] = M1 %*% M2
# g4
out[ngh + 2 * ngq + 1 , nW + nZ + ngq + 3 ] = out[ngh + 2 * ngq + 1 , nW + nZ + ngq + 4 ] = (T - T0) / T
out[ngh + 2 * ngq + 1 , 1 : (nW + 1 )] = colMeans (cbind (1 , data$ W) * (t > T0))
# g5
out[ngh + 2 * ngq + 2 , nW + nZ + ngq + 4 ] = T0 / T
out[ngh + 2 * ngq + 2 , 1 : (nW + 1 )] = colMeans (q * cbind (1 , data$ W) * (t <= T0))
out[ngh + 2 * ngq + 2 , (nW + 2 ): (nW + nZ + 2 )] = colMeans (- q * (data$ Y - h) * cbind (1 , data$ Z[, Z.states, drop = FALSE ]) * (t <= T0))
out
}
######################################################################
# warm start
init.model.h = lm (data$ Y ~ ., data = as.data.frame (data$ W), subset = t <= T0)
init.alpha = coef (init.model.h)
init.model.q = glm (I (t > T0) ~ .,
data = as.data.frame (data$ Z[, Z.states, drop = FALSE ]), family = binomial ())
init.beta = coef (init.model.q)
init.beta[1 ] = init.beta[1 ] + log ((T - T0) / T0)
init.psi = colMeans (gq[t > T0, , drop = FALSE ])
init.psi.minus = mean (exp (cbind (1 , data$ Z[t <= T0, Z.states, drop = FALSE ]) %*% init.beta) *
(data$ Y[t <= T0] - predict (init.model.h, newdata = as.data.frame (data$ W[t <= T0, , drop = FALSE ]))))
init.phi = mean (data$ Y[t > T0] -
predict (init.model.h, newdata = as.data.frame (data$ W[t > T0, , drop = FALSE ]))) - init.psi.minus
theta0 = c (init.alpha, init.beta, init.psi, init.phi, init.psi.minus)
names (theta0) = c (paste0 ("alpha" , 0 : nW),
paste0 ("beta" , 0 : nZ),
paste0 ("psi" , 1 : ngq),
"phi" , "psi-" )
######################################################################
# gmm
model = gmm (
g = g,
x = data,
t0 = theta0,
gradv = gradv,
wmatrix = "ident" ,
control = list (maxit = 1e5 ), method = "BFGS" , vcov = "iid" )
cat ("Parameter estimate: \n " )
print (coef (model))
cat ("Summary of estimated q(Zt): \n " )
print (summary (evalq (coef (model), data)[t <= T0]))
phi.hat = coef (model)["phi" ]
# vcov HAC
var = if (vcovfun == "vcovHAC" ) {
sandwich:: vcovHAC (model)
} else {
sandwich:: NeweyWest (model, prewhite = FALSE )
}
rownames (var) = colnames (var) = names (theta0)
SE = sqrt (var["phi" , "phi" ])
CI = phi.hat + qnorm (c (.025 , .975 )) * SE
SC = evalh (coef (model), data)
list (phi.hat = phi.hat, SE = SE, CI = CI, convergence = model$ algoInfo$ convergence, SC = SC)
}
```
Three candidates for $Z$ - sets that are outside the donor set from
synth and can therefore be used to calibrate $q$.
```{r}
DR.Iowa.result = DR (data, "Iowa" )
```
```{r}
DR.Iowa.SouthDakota.result = DR (data, c ("South Dakota" , "Iowa" ))
```
```{r}
DR.Iowa.SouthDakota.Oklahoma.result =
DR (data, c ("South Dakota" , "Iowa" , "Oklahoma" ))
```
## summary figure
```{r}
plot.d = data.frame (t = data$ t) %>%
mutate (
` Abadies SC ` = Abadie.SC,
DR = DR.Iowa.result$ SC,
DR2 = DR.Iowa.SouthDakota.result$ SC,
DR3 = DR.Iowa.SouthDakota.Oklahoma.result$ SC,
` outcome bridge ` = h.result$ SC,
OLS = OLS.result$ SC
) %>%
pivot_longer (! t, names_to = "method" , values_to = "outcome" ) %>%
arrange (method, t) %>%
mutate (Y = rep (data$ Y, times = 6 ))
colors = hue_pal ()(2 )
ggplot (plot.d, aes (x = 1990 + t * .25 , y = Y)) +
geom_line (color = colors[1 ], linewidth = 1 , alpha = .5 ) +
geom_line (aes (x = 1990 + t * .25 , y = outcome), color = colors[2 ], linetype = "longdash" , linewidth = 1 ) +
geom_vline (xintercept = 1990 + (data$ T0) * .25 , linetype = "dotted" ) +
facet_wrap (~ method, labeller = "label_both" ) +
ylab ("Detrended log GDP per capita" ) +
xlab ("Year" ) +
scale_x_continuous (breaks = seq (1990 , 2010 , 10 )) +
theme_bw ()
```