Declare potential outcomes

declare_potential_outcomes(
  ...,
  handler = potential_outcomes_handler,
  label = NULL
)

potential_outcomes.formula(
  formula,
  conditions = c(0, 1),
  assignment_variables = "Z",
  data,
  level = NULL,
  label = outcome_variable
)

potential_outcomes.NULL(
  formula = stop("Not provided"),
  ...,
  data,
  level = NULL
)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

formula

a formula to calculate potential outcomes as functions of assignment variables.

conditions

see expand_conditions. Provide values (e.g. conditions = 1:4) for a single assignment variable. If multiple assignment variables, provide named list (e.g. conditions = list(Z1 = 0:1, Z2 = 0:1)). Defaults to 0:1 if no conditions provided.

assignment_variables

The name of the assignment variable. Generally not required as names are taken from conditions.

data

a data.frame

level

a character specifying a level of hierarchy for fabricate to calculate at

Value

a function that returns a data.frame

Details

A declare_potential_outcomes function is used to create outcomes that each unit would express in each possible treatment condition.

Examples

# Potential outcomes can be declared in two ways: # by using a formula or as separate variables. # Using a formula declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y ~ 0.5*Z + U)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.03 0.19 0.13 2.69 0.93 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + U) ------- #> #> Formula: Y ~ 0.5 * Z + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.03 0.19 0.13 2.69 0.93 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -1.53 0.69 0.63 3.19 0.93 0 100 #>
# As separate variables declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y_Z_0 = U, Y_Z_1 = U + 0.5)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -3.07 0.09 0.08 2.8 1.05 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y_Z_0 = U, Y_Z_1 = U + 0.5) #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -3.07 0.09 0.08 2.8 1.05 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.57 0.59 0.58 3.3 1.05 0 100 #>
# (notice the naming structure: outcome_assignment_condition: Y_Z_1) # You can change the name of the outcome declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y2 ~ 0.5*Z + U)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -1.85 -0.15 -0.06 1.95 0.85 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y2 ~ 0.5 * Z + U) ------ #> #> Formula: Y2 ~ 0.5 * Z + U #> #> Added variable: Y2_Z_0 #> min median mean max sd N_missing N_unique #> -1.85 -0.15 -0.06 1.95 0.85 0 100 #> #> Added variable: Y2_Z_1 #> min median mean max sd N_missing N_unique #> -1.35 0.35 0.44 2.45 0.85 0 100 #>
# You can change the name of the assignment_variable declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y ~ 0.5*D + U, assignment_variable = "D")
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.03 0.12 0.1 2.1 0.98 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * D + U, assignment_variable = "D") #> #> Formula: Y ~ 0.5 * D + U #> #> Added variable: Y_D_0 #> min median mean max sd N_missing N_unique #> -2.03 0.12 0.1 2.1 0.98 0 100 #> #> Added variable: Y_D_1 #> min median mean max sd N_missing N_unique #> -1.53 0.62 0.6 2.6 0.98 0 100 #>
# `conditions` defines the "range" of the potential outcomes function declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) + declare_potential_outcomes(formula = Y ~ .05 + .25 * Z + .01 * age * Z, conditions = 1:4)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: age #> min median mean max sd N_missing N_unique #> 18 39 40.11 65 14.83 0 44 #> #> Step 2 (potential outcomes): declare_potential_outcomes(formula = Y ~ 0.05 + 0.25 * Z + 0.01 * age * Z, conditions = 1:4) #> #> Formula: Y ~ 0.05 + 0.25 * Z + 0.01 * age * Z #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> 0.48 0.69 0.7 0.95 0.15 0 44 #> #> Added variable: Y_Z_2 #> min median mean max sd N_missing N_unique #> 0.91 1.33 1.35 1.85 0.3 0 44 #> #> Added variable: Y_Z_3 #> min median mean max sd N_missing N_unique #> 1.34 1.97 2 2.75 0.44 0 44 #> #> Added variable: Y_Z_4 #> min median mean max sd N_missing N_unique #> 1.77 2.61 2.65 3.65 0.59 0 44 #>
# Multiple assignment variables can be specified in `conditions`. For example, # in a 2x2 factorial potential outcome: declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) + declare_potential_outcomes(formula = Y ~ .05 + .25 * Z1 + .01 * age * Z2, conditions = list(Z1 = 0:1, Z2 = 0:1))
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, age = sample(18:65, N, replace = TRUE)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: age #> min median mean max sd N_missing N_unique #> 18 46 43.02 65 14.29 0 39 #> #> Step 2 (potential outcomes): declare_potential_outcomes(formula = Y ~ 0.05 + 0.25 * Z1 + 0.01 * age * Z2, conditions = list(Z1 = 0:1, Z2 = 0:1)) #> #> Formula: Y ~ 0.05 + 0.25 * Z1 + 0.01 * age * Z2 #> #> Added variable: Y_Z1_0_Z2_0 #> 0.05 #> 100 #> 1.00 #> #> Added variable: Y_Z1_1_Z2_0 #> 0.3 #> 100 #> 1.00 #> #> Added variable: Y_Z1_0_Z2_1 #> min median mean max sd N_missing N_unique #> 0.23 0.51 0.48 0.7 0.14 0 39 #> #> Added variable: Y_Z1_1_Z2_1 #> min median mean max sd N_missing N_unique #> 0.48 0.76 0.73 0.95 0.14 0 39 #>