Declare sampling procedure
Usage
declare_sampling(..., handler = sampling_handler, label = NULL)
sampling_handler(data, ..., legacy = FALSE)
Arguments
- ...
arguments to be captured, and later passed to the handler
- handler
a tidy-in, tidy-out function
- label
a string describing the step
- data
A data.frame.
- legacy
Use the legacy randomizr functionality. This will be disabled in future; please use legacy = FALSE.
Value
A sampling declaration, which is a function that takes a data.frame as an argument and returns a data.frame subsetted to sampled observations and (optionally) augmented with inclusion probabilities and other quantities.
Examples
# declare_sampling in use
## Two-arm randomized experiment
design <-
declare_model(
N = 500,
X = rep(c(0, 1), each = N / 2),
U = rnorm(N, sd = 0.25),
potential_outcomes(Y ~ 0.2 * Z + X + U)
) +
declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
declare_sampling(S = complete_rs(N = N, n = 200)) +
declare_assignment(Z = complete_ra(N = N, m = 100)) +
declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
declare_estimator(Y ~ Z, inquiry = "ATE")
run_design(design)
#> inquiry estimand estimator term estimate std.error statistic p.value
#> 1 ATE 0.2 estimator Z 0.1584226 0.07939867 1.99528 0.04738339
#> conf.low conf.high df outcome
#> 1 0.001847013 0.3149981 198 Y
# Set up population to sample from
model <- declare_model(
villages = add_level(
N = 30,
N_households = sample(c(50:100), N, replace = TRUE)
),
households = add_level(
N = N_households,
N_members = sample(c(1, 2, 3, 4), N,
prob = c(0.2, 0.3, 0.25, 0.25), replace = TRUE)
),
individuals = add_level(
N = N_members,
age = sample(18:90, N, replace = TRUE),
gender = rbinom(n = N, size = 1, prob = .5)
)
)
# Sampling procedures
## Complete random sampling
design <- model +
declare_sampling(S = complete_rs(N = N, n = 1000))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 6 01 86 0002 4 0006 49 0 1
#> 13 01 86 0004 4 0013 48 1 1
#> 16 01 86 0005 3 0016 33 0 1
#> 18 01 86 0006 4 0018 85 0 1
#> 21 01 86 0006 4 0021 42 0 1
#> 29 01 86 0009 4 0029 84 1 1
## Cluster random sampling
design <- model +
declare_sampling(S = cluster_rs(clusters = villages,
n = 15))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 473 03 88 0184 2 0473 45 0 1
#> 474 03 88 0184 2 0474 70 1 1
#> 475 03 88 0185 2 0475 29 1 1
#> 476 03 88 0185 2 0476 86 1 1
#> 477 03 88 0186 3 0477 40 1 1
#> 478 03 88 0186 3 0478 77 1 1
## Strata and cluster random sampling
design <- model +
declare_sampling(S = strata_and_cluster_rs(
strata = villages,
clusters = households,
strata_n = rep(20, 30)))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 1 01 84 0001 2 0001 18 0 1
#> 2 01 84 0001 2 0002 31 0 1
#> 3 01 84 0002 1 0003 55 0 1
#> 15 01 84 0008 2 0015 70 1 1
#> 16 01 84 0008 2 0016 50 0 1
#> 26 01 84 0014 4 0026 84 1 1
## Stratified random sampling
design <- model +
declare_sampling(S = strata_rs(strata = gender, n = 100))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 23 01 80 0009 1 0023 57 1 1
#> 24 01 80 0010 3 0024 80 0 1
#> 52 01 80 0020 3 0052 71 1 1
#> 58 01 80 0023 1 0058 46 1 1
#> 67 01 80 0028 3 0067 77 0 1
#> 79 01 80 0031 4 0079 35 0 1