Statistical Power

[1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy.random import RandomState

from azcausal.core.panel import CausalPanel
from azcausal.core.effect import get_true_effect
from azcausal.core.error import JackKnife
from azcausal.core.parallelize import Joblib
from azcausal.data import CaliforniaProp99
from azcausal.estimators.panel.did import DID
from azcausal.util import zeros_like
from azcausal.util.analysis import f_power

First, let us load the data you want to analyze the power on. Make sure that no units are already treated in this data set at anytime.

[2]:
panel = CaliforniaProp99().panel().filter(contr=True)

print(panel.summary())
╭──────────────────────────────────────────────────────────────────────────────╮
|                                    Panel                                     |
|  Time Periods: 31 (31/0)                                   total (pre/post)  |
|  Units: 38 (38/0)                                       total (contr/treat)  |
╰──────────────────────────────────────────────────────────────────────────────╯

Next, we define a function which will create a sample of our original panel and add some treatment.

[3]:

class Function: def __init__(self, panel, att, seed) -> None: super().__init__() self.panel = panel self.att = att self.seed = seed def __call__(self, *args, **kwargs): panel = self.panel # parameters seed = self.seed att = self.att # constants conf = 90 n_treat = 5 n_post = 12 # random seed for reproducibility random_state = RandomState(seed) # define what is treated and when treat_units = random_state.choice(np.arange(panel.n_units()), replace=False, size=n_treat) intervention = zeros_like(panel.intervention) intervention.iloc[-n_post:, treat_units] = 1 te = panel.outcome * intervention * (att / 100) outcome = panel.outcome + te # create the new panel with the new intervention panel = CausalPanel(data=dict(intervention=intervention, te=te, outcome=outcome)).setup() # use the estimator to get the effect true_effect = get_true_effect(panel) # run the estimator to get the predicted effect estimator = DID() result = estimator.fit(panel) estimator.error(result, JackKnife()) pred_effect = result.effect # create an output dictionary of what is true and what we have measured res = dict(**pred_effect.to_dict(prefix='pred_', conf=conf), **true_effect.to_dict(prefix='true_', conf=conf)) res.update(dict(att=att, seed=seed)) return res

Power

Then, we create a generator which creates the runs. For each iteration we initialize the Function, we have defined above.

[4]:

# the number of samples used for measuring power n_samples = 100 # create all runs for this analysis (this can potentially include more dimensions as well) def g_power(): for seed in range(n_samples): yield panel, -20, seed

Because running this sequentially is usually pretty slow, we make use of the Parallelize interface in azcausal to speed things up.

[5]:
parallelize = Joblib(prefer='processes', progress=True)
results = parallelize.run([Function(*args) for args in g_power()])

len(results)
100%|██████████| 100/100 [00:04<00:00, 21.04it/s]
[5]:
100

And then we can analyze the resulting power and also coverage of the confidence intervals.

[6]:
dx = (pd.DataFrame(results)
      .assign(true_in_ci=lambda dd: dd['true_avg_te'].between(dd['pred_avg_ci_lb'], dd['pred_avg_ci_ub']))
      .assign(avg_te_error=lambda dd: dd['true_avg_te'] - dd['pred_avg_te'])
      .assign(rel_te_error=lambda dd: dd['true_rel_te'] - dd['pred_rel_te'])
      )

# get the power from the results
power = f_power(dx.assign(sign=lambda dd: dd['pred_sign']))

print("Power")
print(f"(+) {power['+']:.2%}")
print(f"(+/-) {power['+/-']:.2%}")
print(f"(-) {power['-']:.2%}")

print()

coverage = dx['true_in_ci'].mean()
print(f"Coverage: {coverage:.1%}")

avg_te_rmse = np.sqrt((dx['avg_te_error'] ** 2).mean())
print(f"Average TE RMSE: {avg_te_rmse}")

rel_te_rmse = np.sqrt((dx['rel_te_error'] ** 2).mean())
print(f"Relative TE RMSE: {rel_te_rmse}")
Power
(+) 0.00%
(+/-) 5.00%
(-) 95.00%

Coverage: 89.0%
Average TE RMSE: 7.986427459857413
Relative TE RMSE: 0.05869881281193749

Power Analysis

We estimate our statistical power to be around 95% given the setup above. In addition, we might want to be a little more systematic and answer the question of how much power does a specific ATT parameter have (this can be extended to any parameter such as number of treatment regions or post time periods).

[7]:
def g_power_analysis():
    for att in np.linspace(-30, 30, 13):
        for seed in range(n_samples):
            yield att, seed


parallelize = Joblib(prefer='processes', progress=True)
results = parallelize.run([Function(panel, *args) for args in g_power_analysis()])
100%|██████████| 1300/1300 [00:45<00:00, 28.35it/s]
[8]:
dx.columns
[8]:
Index(['pred_effect', 'pred_sign', 'pred_avg_te', 'pred_avg_ci_lb',
       'pred_avg_ci_ub', 'pred_rel_te', 'pred_rel_ci_lb', 'pred_rel_ci_ub',
       'pred_perc_te', 'pred_perc_ci_lb', 'pred_perc_ci_ub', 'pred_cum_te',
       'pred_cum_ci_lb', 'pred_cum_ci_ub', 'true_effect', 'true_sign',
       'true_avg_te', 'true_avg_ci_lb', 'true_avg_ci_ub', 'true_rel_te',
       'true_rel_ci_lb', 'true_rel_ci_ub', 'true_perc_te', 'true_perc_ci_lb',
       'true_perc_ci_ub', 'true_cum_te', 'true_cum_ci_lb', 'true_cum_ci_ub',
       'att', 'seed', 'true_in_ci', 'avg_te_error', 'rel_te_error'],
      dtype='object')
[9]:
dx = (pd.DataFrame(results)
      .assign(true_in_ci=lambda dd: dd['true_avg_te'].between(dd['pred_avg_ci_lb'], dd['pred_avg_ci_ub']))
      .assign(perc_te_error=lambda dd: dd['pred_perc_te'] - dd['true_perc_te'])
      )

# get the power and coverage for each group now
pw = dx.assign(sign=lambda dd: dd['pred_sign']).groupby('att').apply(f_power).sort_index().reset_index()
coverage = dx.groupby('att')['true_in_ci'].mean()
error = dx.groupby('att').aggregate(mean=('perc_te_error', 'mean'), se=('perc_te_error', 'sem'))
[10]:
fig, (top, middle, bottom) = plt.subplots(3, 1, figsize=(12, 8), sharex=True)

fig.suptitle(f'CaliforniaProp99', fontsize=16)

top.plot(pw['att'], pw['-'], "-o", color="red", label='-')
top.plot(pw['att'], pw['+'], "-o", color="green", label='+')
top.plot(pw['att'], pw['+/-'], "-o", color="black", label='+/-', alpha=0.5)
top.axhline(1.0, color="black", alpha=0.15)
top.axhline(0.9, color="black", alpha=0.15, linestyle='--')
top.axhline(0.0, color="black", alpha=0.15)
top.set_ylim(-0.05, 1.05)
top.set_xlabel("ATT (%)")
top.set_ylabel("Statistical Power")
top.legend()

middle.plot(coverage.index, coverage.values, "-o", color="black", label="coverage")
middle.axhline(1.0, color="black", alpha=0.15)
middle.axhline(0.0, color="black", alpha=0.15)
middle.set_ylim(-0.05, 1.05)
middle.set_xlabel("ATT (%)")
middle.set_ylabel("Coverage")
middle.legend()


bottom.plot(error.index, np.zeros(len(error)), color='black', alpha=0.7)
bottom.plot(error.index, error['mean'], '-o', color='red')
bottom.errorbar(error.index, error['mean'], error['se'], color='red', alpha=0.5, barsabove=True)
bottom.set_xlabel("ATT (%)")
bottom.set_ylabel("Error")

plt.tight_layout()
plt.show()
_images/power_18_0.png