This document shows some things researchers may wish to add to their preanalysis plans using design declaration and diagnosis.
In [1]: import numpy as np
...: import scipy as sp
...: import statsmodels.api as sm
...: import statsmodels.formula.api as smf
...: import pandas as pd
...: import numpy as np
...: import seaborn as sns
In [2]: def draw_data(sample_size = 100, effect_size = 0.5, number_treated = 50):
...: # Model
...:
...: # Population
...: pop = pd.DataFrame({
...: 'noise' : sp.stats.norm(loc = 0, scale = 1).rvs(sample_size)})
...:
...: # Potential outcomes
...: pop['Y_Z_0'] = pop['noise']
...: pop['Y_Z_1'] = pop['noise'] + effect_size
...:
...: # Data Strategy
...:
...: # Assignment strategy
...: pop['Z'] = np.random.choice(np.concatenate(([1] * 50, [0] * 50)), sample_size, replace = False)
...:
...: # Reveal outcomes
...: pop['Y'] = pop['Y_Z_0'] * (1 - pop['Z']) + pop['Y_Z_1'] * pop['Z']
...:
...: return pop;
...:
...:
...: def fit_model(data):
...: # Answer strategy
...: model_fit = smf.ols('Y ~ Z', data = data).fit()
...:
...: fit = model_fit.get_robustcov_results(cov_type='HC2')
...:
...: return fit;
...:
...: def diagnose_design(sims = 500):
...:
...: p_values = []
...:
...: for x in range(sims):
...:
...: data = draw_data()
...:
...: fit = fit_model(data)
...:
...: p_values = np.append(p_values, fit.pvalues[1])
...:
...: power = np.mean(p_values < .05)
...:
...: return power;
In [3]: df = draw_data()
...: df['treatment'] = np.where(df['Z']==1, 'treatment', 'control')
...:
...: one_run = draw_data()
...: one_run['treatment'] = np.where(one_run['Z']==1, 'treatment', 'control')
...:
...: sns_plot = sns.catplot(x = "treatment", y = "Y", data = df)
...: sns_plot.savefig("figures/two_arm_design_stata.png")
...: fig.save
...: sns.catplot(x = "treatment", y = "Y", data = df, kind = 'point')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-3-f6d59cbb1adf> in <module>
7 sns_plot = sns.catplot(x = "treatment", y = "Y", data = df)
8 sns_plot.savefig("figures/two_arm_design_stata.png")
----> 9 fig.save
10 sns.catplot(x = "treatment", y = "Y", data = df, kind = 'point')
NameError: name 'fig' is not defined
Dep. Variable: | Y | R-squared: | 0.044 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.034 |
Method: | Least Squares | F-statistic: | 4.530 |
Date: | Wed, 16 Jan 2019 | Prob (F-statistic): | 0.0358 |
Time: | 19:21:07 | Log-Likelihood: | -146.86 |
No. Observations: | 100 | AIC: | 297.7 |
Df Residuals: | 98 | BIC: | 302.9 |
Df Model: | 1 | ||
Covariance Type: | HC2 |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | -0.0019 | 0.151 | -0.012 | 0.990 | -0.302 | 0.298 |
Z | 0.4519 | 0.212 | 2.128 | 0.036 | 0.031 | 0.873 |
Omnibus: | 2.835 | Durbin-Watson: | 2.166 |
---|---|---|---|
Prob(Omnibus): | 0.242 | Jarque-Bera (JB): | 2.030 |
Skew: | 0.167 | Prob(JB): | 0.362 |
Kurtosis: | 2.387 | Cond. No. | 2.62 |
0.696