Declares estimands. Estimands are the subjects of inquiry and can be estimated by an estimator.

declare_estimand(..., handler = estimand_handler, label = "estimand")

declare_estimands(..., handler = estimand_handler, label = "estimand")

estimand_handler(data, ..., subset = NULL, term = FALSE, label)

## Arguments

... arguments to be captured, and later passed to the handler a tidy-in, tidy-out function a string describing the step a data.frame a subset expression TRUE/FALSE

## Value

an estimand declaration, which is a function that accepts a data.frame as an argument and returns a data.frame containing the value of the estimand.

## Details

For the default diagnosands, the return value of the handler should have estimand_label and estimand columns.

If term is TRUE, the names of ... will be returned in a term column, and estimand_label will contain the step label. This can be used as an additional dimension for use in diagnosis.

## Examples

# Set up a design for use in examples: design <- declare_population(N = 100, X = rnorm(N)) + declare_potential_outcomes(Y ~ (.25 + X) * Z + rnorm(N)) + declare_assignment(m = 50) design + declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, X = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: X #> min median mean max sd N_missing N_unique #> -2.45 -0.13 -0.14 1.85 0.94 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ (0.25 + X) * Z + rnorm(N)) #> #> Formula: Y ~ (0.25 + X) * Z + rnorm(N) #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.81 0.05 0.02 2.1 0.83 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -2.83 0.02 0.13 3.21 1.33 0 100 #> #> Step 3 (assignment): declare_assignment(m = 50) -------------------------------- #> #> Added variable: Z #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.83 0.15 0.16 3.21 1.19 0 100 #> #> Step 5 (estimand): declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) ----------------- #> #> A single draw of the estimand: #> estimand_label estimand #> ATE 0.1159849 #>
design + declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0), subset = (Z == 1))
#> #> Design Summary #> #> Step 1 (population): declare_population(N = 100, X = rnorm(N)) ----------------- #> #> N = 100 #> #> Added variable: ID #> N_missing N_unique class #> 0 100 character #> #> Added variable: X #> min median mean max sd N_missing N_unique #> -2.67 0.17 0.16 3.12 1.1 0 100 #> #> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ (0.25 + X) * Z + rnorm(N)) #> #> Formula: Y ~ (0.25 + X) * Z + rnorm(N) #> #> Added variable: Y_Z_0 #> min median mean max sd N_missing N_unique #> -2.34 -0.01 -0.09 2.59 1.01 0 100 #> #> Added variable: Y_Z_1 #> min median mean max sd N_missing N_unique #> -3.75 0.63 0.44 3.75 1.48 0 100 #> #> Step 3 (assignment): declare_assignment(m = 50) -------------------------------- #> #> Added variable: Z #> 0 1 #> 50 50 #> 0.50 0.50 #> #> Added variable: Z_cond_prob #> 0.5 #> 100 #> 1.00 #> #> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z", label = "Autogenerated by ") #> #> Added variable: Y #> min median mean max sd N_missing N_unique #> -2.34 0.04 0.05 2.59 1.14 0 100 #> #> Step 5 (estimand): declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0), subset = (Z == 1)) #> #> A single draw of the estimand: #> estimand_label estimand #> ATT 0.2802348 #>
# Add estimands to a design along with estimators that reference them design_1 <- design + declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) + declare_estimator(Y ~ Z, estimand = "ATE") run_design(design_1)
#> estimand_label estimand estimator_label term estimate std.error statistic #> 1 ATE 0.5302734 estimator Z 0.1812491 0.2493754 0.7268124 #> p.value conf.low conf.high df outcome #> 1 0.4693531 -0.3146307 0.677129 84.35574 Y
# Two estimands, one estimator design_2 <- design + declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) + declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0), subset = (Z == 1)) + declare_estimator(Y ~ Z, estimand = c("ATE", "ATT")) run_design(design_2)
#> estimand_label estimand estimator_label term estimate std.error statistic #> 1 ATE 0.4904039 estimator Z 0.2133933 0.2372792 0.8993341 #> 2 ATT 0.4277441 estimator Z 0.2133933 0.2372792 0.8993341 #> p.value conf.low conf.high df outcome #> 1 0.3709524 -0.258212 0.6849986 87.16732 Y #> 2 0.3709524 -0.258212 0.6849986 87.16732 Y
# Two estimands, two coefficients from one estimator design_3 <- design + declare_estimand(intercept = mean(Y_Z_0), slope = mean(Y_Z_1 - Y_Z_0)) + declare_estimator( Y ~ Z, model = lm_robust, term = TRUE, estimand = c("intercept", "slope") ) run_design(design_3)
#> estimand_label estimand estimator_label term estimate std.error #> 1 intercept -0.04009129 estimator (Intercept) -0.003307604 0.1582487 #> 2 slope 0.44873102 estimator Z 0.447397643 0.2700112 #> statistic p.value conf.low conf.high df outcome #> 1 -0.02090131 0.9833669 -0.31734696 0.3107317 98 Y #> 2 1.65695952 0.1007259 -0.08843081 0.9832261 98 Y