Declares estimands. Estimands are the subjects of inquiry and can be estimated by an estimator.

declare_estimand(..., handler = estimand_handler, label = "estimand")

declare_estimands(..., handler = estimand_handler, label = "estimand")

estimand_handler(data, ..., subset = NULL, term = FALSE, label)

## Arguments

... arguments to be captured, and later passed to the handler a tidy-in, tidy-out function a string describing the step a data.frame a subset expression TRUE/FALSE

## Value

an estimand declaration, which is a function that accepts a data.frame as an argument and returns a data.frame containing the value of the estimand.

## Details

For the default diagnosands, the return value of the handler should have estimand_label and estimand columns.

If term is TRUE, the names of ... will be returned in a term column, and estimand_label will contain the step label. This can be used as an additional dimension for use in diagnosis.

## Examples


# Set up a design for use in examples:

design <-
declare_population(N = 100, X = rnorm(N)) +
declare_potential_outcomes(Y ~ (.25 + X) * Z + rnorm(N)) +
declare_assignment(m = 50)

design + declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
#>
#> Design Summary
#>
#> Step 1 (population): declare_population(N = 100, X = rnorm(N)) -----------------
#>
#> N = 100
#>
#>  N_missing N_unique     class
#>          0      100 character
#>
#>    min median  mean  max   sd N_missing N_unique
#>  -2.45  -0.13 -0.14 1.85 0.94         0      100
#>
#> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ (0.25 + X) * Z + rnorm(N))
#>
#> Formula: Y ~ (0.25 + X) * Z + rnorm(N)
#>
#>    min median mean max   sd N_missing N_unique
#>  -2.81   0.05 0.02 2.1 0.83         0      100
#>
#>    min median mean  max   sd N_missing N_unique
#>  -2.83   0.02 0.13 3.21 1.33         0      100
#>
#> Step 3 (assignment): declare_assignment(m = 50) --------------------------------
#>
#>     0    1
#>    50   50
#>  0.50 0.50
#>
#>   0.5
#>   100
#>  1.00
#>
#> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z",     label = "Autogenerated by ")
#>
#>    min median mean  max   sd N_missing N_unique
#>  -2.83   0.15 0.16 3.21 1.19         0      100
#>
#> Step 5 (estimand): declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) -----------------
#>
#> A single draw of the estimand:
#>  estimand_label  estimand
#>             ATE 0.1159849
#>

design + declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0),
subset = (Z == 1))
#>
#> Design Summary
#>
#> Step 1 (population): declare_population(N = 100, X = rnorm(N)) -----------------
#>
#> N = 100
#>
#>  N_missing N_unique     class
#>          0      100 character
#>
#>    min median mean  max  sd N_missing N_unique
#>  -2.67   0.17 0.16 3.12 1.1         0      100
#>
#> Step 2 (potential outcomes): declare_potential_outcomes(Y ~ (0.25 + X) * Z + rnorm(N))
#>
#> Formula: Y ~ (0.25 + X) * Z + rnorm(N)
#>
#>    min median  mean  max   sd N_missing N_unique
#>  -2.34  -0.01 -0.09 2.59 1.01         0      100
#>
#>    min median mean  max   sd N_missing N_unique
#>  -3.75   0.63 0.44 3.75 1.48         0      100
#>
#> Step 3 (assignment): declare_assignment(m = 50) --------------------------------
#>
#>     0    1
#>    50   50
#>  0.50 0.50
#>
#>   0.5
#>   100
#>  1.00
#>
#> Step 4 (reveal): reveal_outcomes(outcome_variables = "Y", assignment_variables = "Z",     label = "Autogenerated by ")
#>
#>    min median mean  max   sd N_missing N_unique
#>  -2.34   0.04 0.05 2.59 1.14         0      100
#>
#> Step 5 (estimand): declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0), subset = (Z == 1))
#>
#> A single draw of the estimand:
#>  estimand_label  estimand
#>             ATT 0.2802348
#>
# Add estimands to a design along with estimators that reference them

design_1 <-
design +
declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) +
declare_estimator(Y ~ Z, estimand = "ATE")

run_design(design_1)
#>   estimand_label  estimand estimator_label term  estimate std.error statistic
#> 1            ATE 0.5302734       estimator    Z 0.1812491 0.2493754 0.7268124
#>     p.value   conf.low conf.high       df outcome
#> 1 0.4693531 -0.3146307  0.677129 84.35574       Y
# Two estimands, one estimator

design_2 <-
design +
declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) +
declare_estimand(ATT = mean(Y_Z_1 - Y_Z_0), subset = (Z == 1)) +
declare_estimator(Y ~ Z, estimand = c("ATE", "ATT"))

run_design(design_2)
#>   estimand_label  estimand estimator_label term  estimate std.error statistic
#> 1            ATE 0.4904039       estimator    Z 0.2133933 0.2372792 0.8993341
#> 2            ATT 0.4277441       estimator    Z 0.2133933 0.2372792 0.8993341
#>     p.value  conf.low conf.high       df outcome
#> 1 0.3709524 -0.258212 0.6849986 87.16732       Y
#> 2 0.3709524 -0.258212 0.6849986 87.16732       Y
# Two estimands, two coefficients from one estimator

design_3 <-
design +
declare_estimand(intercept = mean(Y_Z_0),
slope = mean(Y_Z_1 - Y_Z_0)) +
declare_estimator(
Y ~ Z,
model = lm_robust,
term = TRUE,
estimand = c("intercept", "slope")
)

run_design(design_3)
#>   estimand_label    estimand estimator_label        term     estimate std.error
#> 1      intercept -0.04009129       estimator (Intercept) -0.003307604 0.1582487
#> 2          slope  0.44873102       estimator           Z  0.447397643 0.2700112
#>     statistic   p.value    conf.low conf.high df outcome
#> 1 -0.02090131 0.9833669 -0.31734696 0.3107317 98       Y
#> 2  1.65695952 0.1007259 -0.08843081 0.9832261 98       Y