Declare potential outcomes
declare_potential_outcomes( ..., handler = potential_outcomes_handler, label = NULL ) potential_outcomes.formula( formula, conditions = c(0, 1), assignment_variables = "Z", data, level = NULL, label = outcome_variable ) potential_outcomes.NULL( formula = stop("Not provided"), ..., data, level = NULL )
... | arguments to be captured, and later passed to the handler |
---|---|
handler | a tidy-in, tidy-out function |
label | a string describing the step |
formula | a formula to calculate potential outcomes as functions of assignment variables. |
conditions | see |
assignment_variables | The name of the assignment variable. Generally not required as names are taken from |
data | a data.frame |
level | a character specifying a level of hierarchy for fabricate to calculate at |
a function that returns a data.frame
A declare_potential_outcomes
function is used to create outcomes that each unit would express in each possible treatment condition.
# Potential outcomes can be declared in two ways: # by using a formula or as separate variables. # Using a formula declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y ~ 0.5*Z + U)#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.03 0.19 0.13 2.69 0.93 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + U) ------- #> #> Formula: Y ~ 0.5 * Z + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.03 0.19 0.13 2.69 0.93 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -1.53 0.69 0.63 3.19 0.93 0 100 #># As separate variables declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y_Z_0 = U, Y_Z_1 = U + 0.5)#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -3.07 0.09 0.08 2.8 1.05 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y_Z_0 = U, Y_Z_1 = U + 0.5) #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -3.07 0.09 0.08 2.8 1.05 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.57 0.59 0.58 3.3 1.05 0 100 #># (notice the naming structure: outcome_assignment_condition: Y_Z_1) # You can change the name of the outcome declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y2 ~ 0.5*Z + U)#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -1.85 -0.15 -0.06 1.95 0.85 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y2 ~ 0.5 * Z + U) ------ #> #> Formula: Y2 ~ 0.5 * Z + U #> #> Added variable: Y2_Z_0 #> min median mean max sd N_missing N_unique #> -1.85 -0.15 -0.06 1.95 0.85 0 100 #> #> Added variable: Y2_Z_1 #> min median mean max sd N_missing N_unique #> -1.35 0.35 0.44 2.45 0.85 0 100 #># You can change the name of the assignment_variable declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y ~ 0.5*D + U, assignment_variable = "D")#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.03 0.12 0.1 2.1 0.98 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * D + U, assignment_variable = "D") #> #> Formula: Y ~ 0.5 * D + U #> #> Added variable: Y_D_0 #> min median mean max sd N_missing N_unique #> -2.03 0.12 0.1 2.1 0.98 0 100 #> #> Added variable: Y_D_1 #> min median mean max sd N_missing N_unique #> -1.53 0.62 0.6 2.6 0.98 0 100 #># `conditions` defines the "range" of the potential outcomes function declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) + declare_potential_outcomes(formula = Y ~ .05 + .25 * Z + .01 * age * Z, conditions = 1:4)#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: age #> min median mean max sd N_missing N_unique #> 18 39 40.11 65 14.83 0 44 #> #> Step 2 (potential outcomes): declare_potential_outcomes(formula = Y ~ 0.05 + 0.25 * Z + 0.01 * age * Z, conditions = 1:4) #> #> Formula: Y ~ 0.05 + 0.25 * Z + 0.01 * age * Z #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> 0.48 0.69 0.7 0.95 0.15 0 44 #> #> Added variable: Y_Z_2 #> min median mean max sd N_missing N_unique #> 0.91 1.33 1.35 1.85 0.3 0 44 #> #> Added variable: Y_Z_3 #> min median mean max sd N_missing N_unique #> 1.34 1.97 2 2.75 0.44 0 44 #> #> Added variable: Y_Z_4 #> min median mean max sd N_missing N_unique #> 1.77 2.61 2.65 3.65 0.59 0 44 #># Multiple assignment variables can be specified in `conditions`. For example, # in a 2x2 factorial potential outcome: declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) + declare_potential_outcomes(formula = Y ~ .05 + .25 * Z1 + .01 * age * Z2, conditions = list(Z1 = 0:1, Z2 = 0:1))#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: age #> min median mean max sd N_missing N_unique #> 18 46 43.02 65 14.29 0 39 #> #> Step 2 (potential outcomes): declare_potential_outcomes(formula = Y ~ 0.05 + 0.25 * Z1 + 0.01 * age * Z2, conditions = list(Z1 = 0:1, Z2 = 0:1)) #> #> Formula: Y ~ 0.05 + 0.25 * Z1 + 0.01 * age * Z2 #> #> Added variable: Y_Z1_0_Z2_0 #> 0.05 #> 100 #> 1.00 #> #> Added variable: Y_Z1_1_Z2_0 #> 0.3 #> 100 #> 1.00 #> #> Added variable: Y_Z1_0_Z2_1 #> min median mean max sd N_missing N_unique #> 0.23 0.51 0.48 0.7 0.14 0 39 #> #> Added variable: Y_Z1_1_Z2_1 #> min median mean max sd N_missing N_unique #> 0.48 0.76 0.73 0.95 0.14 0 39 #>