Declare assignment procedure

declare_assignment(..., handler = assignment_handler, label = NULL)

assignment_handler(
  data,
  ...,
  assignment_variable = "Z",
  append_probabilities_matrix = FALSE
)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

data

A data.frame.

assignment_variable

Name for assignment variable (quoted). Defaults to "Z". Argument to be used with default handler.

append_probabilities_matrix

Should the condition probabilities matrix be appended to the data? Defaults to FALSE. Argument to be used with default handler.

Value

An assignment declaration, which is a function that takes a data.frame as an argument and returns a data.frame with additional columns appended including an assignment variable and (optionally) probabilities of assignment.

Details

declare_assignment can work with any assignment_function that takes data and returns data. The default handler is conduct_ra from the randomizr package. This allows quick declaration of many assignment schemes that involve simple or complete random assignment with blocks and clusters. The arguments to conduct_ra can include N, block_var, clust_var, m, m_each, prob, prob_each, block_m, block_m_each, block_prob, block_prob_each, num_arms, and conditions. The arguments you need to specify are different for different designs. For details see the help files for complete_ra, block_ra, cluster_ra, or block_and_cluster_ra.

By default, declare_assignment declares a simple random assignment with probability 0.5.

Custom assignment handlers should augment the data frame with an appropriate column for the assignment(s).

Examples

# let's work with the beginnings of a design design <- declare_population(N = 100, female = rbinom(N, 1, 0.5), U = rnorm(N)) + # building in treatment effect heterogeneity for fun declare_potential_outcomes(Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U) # Declare simple (or "Bernoulli", or "coin flip) random assignment design_with_assignment <- design + declare_assignment(prob = 0.5, simple = TRUE) head(draw_data(design_with_assignment))
#> ID female U Y_Z_0 Y_Z_1 Z Z_cond_prob Y #> 1 001 0 0.02633195 0.02633195 0.5263319 0 0.5 0.02633195 #> 2 002 1 -0.91035071 -0.71035071 -0.1103507 1 0.5 -0.11035071 #> 3 003 0 -1.10299506 -1.10299506 -0.6029951 0 0.5 -1.10299506 #> 4 004 1 0.16071244 0.36071244 0.9607124 1 0.5 0.96071244 #> 5 005 1 0.01555982 0.21555982 0.8155598 0 0.5 0.21555982 #> 6 006 0 0.29092774 0.29092774 0.7909277 1 0.5 0.79092774
# Declare assignment of m units to treatment design + declare_assignment(m = 50)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, female = rbinom(N, 1, 0.5), U = rnorm(N)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: female #> 0 1 #> 59 41 #> 0.59 0.41 #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.43 0.17 0.07 3.03 1.04 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U) #> #> Formula: Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.23 0.22 0.16 3.23 1.04 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -1.71 0.78 0.7 3.83 1.04 0 100 #> #> Step 3 (assignment): declare_assignment(m = 50) -------------------------------- #> #> Added variable: Z #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.23 0.51 0.43 3.83 1.09 0 100 #>
# Declare assignment of exactly half of the units to treatment design + declare_assignment(prob = 0.5)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, female = rbinom(N, 1, 0.5), U = rnorm(N)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: female #> 0 1 #> 49 51 #> 0.49 0.51 #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.89 -0.04 -0.02 2.79 1.09 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U) #> #> Formula: Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.69 0.05 0.08 2.99 1.1 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.09 0.62 0.64 3.59 1.11 0 100 #> #> Step 3 (assignment): declare_assignment(prob = 0.5) ---------------------------- #> #> Added variable: Z #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.53 0.33 0.35 3.2 1.11 0 100 #>
# Declare blocked assignment design + declare_assignment(blocks = female)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, female = rbinom(N, 1, 0.5), U = rnorm(N)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: female #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -3.2 -0.07 -0.09 1.55 0.95 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U) #> #> Formula: Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -3 0.04 0.01 1.75 0.93 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.4 0.6 0.56 2.35 0.92 0 100 #> #> Step 3 (assignment): declare_assignment(blocks = female) ----------------------- #> #> Added variable: Z #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.4 0.44 0.28 2.04 0.96 0 100 #>
# Declare assignment specifying assignment probability for each block design + declare_assignment(block_prob = c(1/3, 2/3), blocks = female)
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, female = rbinom(N, 1, 0.5), U = rnorm(N)) #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: female #> 0 1 #> 46 54 #> 0.46 0.54 #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.47 -0.15 0 2.68 1.11 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U) #> #> Formula: Y ~ 0.5 * Z + 0.2 * female + 0.1 * Z * female + U #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.47 -0.13 0.11 2.88 1.12 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -1.97 0.43 0.66 3.48 1.13 0 100 #> #> Step 3 (assignment): declare_assignment(block_prob = c(1/3, 2/3), blocks = female) #> #> Added variable: Z #> 0 1 #> 48 52 #> 0.48 0.52 #> #> Added variable: Z_cond_prob #> 0.333333333333333 0.666666666666667 #> 34 66 #> 0.34 0.66 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.47 0.34 0.4 3.11 1.17 0 100 #>
# Declare factorial assignment (Approach 1): Use complete random assignment # to assign Z1 and then use Z1 as a block to assign Z2. design <- declare_population(N = 100, U = rnorm(N)) + declare_potential_outcomes(Y ~ Z1 + Z2 + Z1*Z2 + U, conditions = list(Z1 = 0:1, Z2 = 0:1)) design + declare_assignment(assignment_variable = "Z1") + declare_assignment(blocks = Z1, assignment_variable = "Z2")
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.13 -0.12 -0.09 2 0.92 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ Z1 + Z2 + Z1 * Z2 + U, conditions = list(Z1 = 0:1, Z2 = 0:1)) #> #> Formula: Y ~ Z1 + Z2 + Z1 * Z2 + U #> #> Added variable: Y_Z1_0_Z2_0 #> min median mean max sd N_missing N_unique #> -2.13 -0.12 -0.09 2 0.92 0 100 #> #> Added variable: Y_Z1_1_Z2_0 #> min median mean max sd N_missing N_unique #> -1.13 0.88 0.91 3 0.92 0 100 #> #> Added variable: Y_Z1_0_Z2_1 #> min median mean max sd N_missing N_unique #> -1.13 0.88 0.91 3 0.92 0 100 #> #> Added variable: Y_Z1_1_Z2_1 #> min median mean max sd N_missing N_unique #> 0.87 2.88 2.91 5 0.92 0 100 #> #> Step 3 (assignment): declare_assignment(assignment_variable = "Z1") ------------ #> #> Added variable: Z1 #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z1_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (assignment): declare_assignment(blocks = Z1, assignment_variable = "Z2") #> #> Added variable: Z2 #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z2_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 5 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = c("Z1", "Z2"), label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.12 0.98 1.16 4.92 1.45 0 100 #>
# Declare factorial assignment (Approach 2): # Assign to four conditions and then split into separate factors. design + declare_assignment(conditions = 1:4) + declare_step(fabricate, Z1 = as.numeric(Z %in% 2:3), Z2 = as.numeric(Z %in% 3:4))
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, U = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: U #> min median mean max sd N_missing N_unique #> -2.11 -0.08 -0.03 2.07 0.93 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ Z1 + Z2 + Z1 * Z2 + U, conditions = list(Z1 = 0:1, Z2 = 0:1)) #> #> Formula: Y ~ Z1 + Z2 + Z1 * Z2 + U #> #> Added variable: Y_Z1_0_Z2_0 #> min median mean max sd N_missing N_unique #> -2.11 -0.08 -0.03 2.07 0.93 0 100 #> #> Added variable: Y_Z1_1_Z2_0 #> min median mean max sd N_missing N_unique #> -1.11 0.92 0.97 3.07 0.93 0 100 #> #> Added variable: Y_Z1_0_Z2_1 #> min median mean max sd N_missing N_unique #> -1.11 0.92 0.97 3.07 0.93 0 100 #> #> Added variable: Y_Z1_1_Z2_1 #> min median mean max sd N_missing N_unique #> 0.89 2.92 2.97 5.07 0.93 0 100 #> #> Step 3 (assignment): declare_assignment(conditions = 1:4) ---------------------- #> #> Added variable: Z #> 1 2 3 4 #> 25 25 25 25 #> 0.25 0.25 0.25 0.25 #> #> Added variable: Z_cond_prob #> 0.25 #> 100 #> 1.00 #> #> Step 4 (custom): declare_step(fabricate, Z1 = as.numeric(Z %in% 2:3), Z2 = as.numeric(Z %in% 3:4)) #> #> Added variable: Z1 #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z2 #> 0 1 #> 50 50 #> 0.50 0.50 #>
# Declare clustered assignment clustered_design <- declare_population( classrooms = add_level(25, cluster_shock = rnorm(N, sd = 0.5)), students = add_level(5, individual_shock = rnorm(N, sd = 1.0)) ) + declare_potential_outcomes(Y ~ 0.5* Z + cluster_shock + individual_shock) clustered_design + declare_assignment(clusters = classrooms)
#> #> Design Summary #> #> Step 1 (population): declare_population(classrooms = add_level(25, cluster_shock = rnorm(N, sd = 0.5)), students = add_level(5, individual_shock = rnorm(N, sd = 1))) #> #> N = 125 #> #> Added variable: classrooms #> N_missing N_unique class #> 0 25 character #> #> Added variable: cluster_shock #> min median mean max sd N_missing N_unique #> -0.79 -0.3 -0.2 0.58 0.43 0 25 #> #> Added variable: students #> N_missing N_unique class #> 0 125 character #> #> Added variable: individual_shock #> min median mean max sd N_missing N_unique #> -2.56 -0.22 -0.18 2.41 0.96 0 125 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ 0.5 * Z + cluster_shock + individual_shock) #> #> Formula: Y ~ 0.5 * Z + cluster_shock + individual_shock #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -3.07 -0.48 -0.37 2.6 1.05 0 125 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.57 0.02 0.13 3.1 1.05 0 125 #> #> Step 3 (assignment): declare_assignment(clusters = classrooms) ----------------- #> #> Added variable: Z #> 0 1 #> 65 60 #> 0.52 0.48 #> #> Added variable: Z_cond_prob #> 0.5 #> 125 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -3.07 -0.22 -0.13 3.1 1.06 0 125 #>
# Declare assignment using custom handler custom_assignment <- function(data, assignment_variable = "X") { data[, assignment_variable] <- rbinom(n = nrow(data), size = 1, prob = 0.5) data } declare_population(N = 6) + declare_assignment(handler = custom_assignment, assignment_variable = "X")
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 6) --------------------------------- #> #> N = 6 #> #> Added variable: ID #> N_missing N_unique class #> 0 6 character #> #> Step 2 (assignment): declare_assignment(assignment_variable = "X", handler = custom_assignment) #> #> Added variable: X #> 0 1 #> 3 3 #> 0.50 0.50 #>