Declare sampling procedure
Usage
declare_sampling(..., handler = sampling_handler, label = NULL)
sampling_handler(data, ..., legacy = FALSE)
Arguments
- ...
arguments to be captured, and later passed to the handler
- handler
a tidy-in, tidy-out function
- label
a string describing the step
- data
A data.frame.
- legacy
Use the legacy randomizr functionality. This will be disabled in future; please use legacy = FALSE.
Value
A sampling declaration, which is a function that takes a data.frame as an argument and returns a data.frame subsetted to sampled observations and (optionally) augmented with inclusion probabilities and other quantities.
Examples
# declare_sampling in use
## Two-arm randomized experiment
design <-
declare_model(
N = 500,
X = rep(c(0, 1), each = N / 2),
U = rnorm(N, sd = 0.25),
potential_outcomes(Y ~ 0.2 * Z + X + U)
) +
declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
declare_sampling(S = complete_rs(N = N, n = 200)) +
declare_assignment(Z = complete_ra(N = N, m = 100)) +
declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
declare_estimator(Y ~ Z, inquiry = "ATE")
run_design(design)
#> inquiry estimand estimator term estimate std.error statistic p.value
#> 1 ATE 0.2 estimator Z 0.3308988 0.07461297 4.43487 1.526199e-05
#> conf.low conf.high df outcome
#> 1 0.1837607 0.4780369 198 Y
# Set up population to sample from
model <- declare_model(
villages = add_level(
N = 30,
N_households = sample(c(50:100), N, replace = TRUE)
),
households = add_level(
N = N_households,
N_members = sample(c(1, 2, 3, 4), N,
prob = c(0.2, 0.3, 0.25, 0.25), replace = TRUE)
),
individuals = add_level(
N = N_members,
age = sample(18:90, N, replace = TRUE),
gender = rbinom(n = N, size = 1, prob = .5)
)
)
# Sampling procedures
## Complete random sampling
design <- model +
declare_sampling(S = complete_rs(N = N, n = 1000))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 2 01 69 0001 3 0002 39 1 1
#> 12 01 69 0005 3 0012 67 0 1
#> 41 01 69 0015 2 0041 45 1 1
#> 52 01 69 0020 2 0052 65 0 1
#> 56 01 69 0022 2 0056 89 1 1
#> 70 01 69 0028 3 0070 77 1 1
## Cluster random sampling
design <- model +
declare_sampling(S = cluster_rs(clusters = villages,
n = 15))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 217 02 54 0085 4 0217 72 1 1
#> 218 02 54 0085 4 0218 38 0 1
#> 219 02 54 0085 4 0219 25 1 1
#> 220 02 54 0085 4 0220 71 1 1
#> 221 02 54 0086 2 0221 34 1 1
#> 222 02 54 0086 2 0222 34 1 1
## Strata and cluster random sampling
design <- model +
declare_sampling(S = strata_and_cluster_rs(
strata = villages,
clusters = households,
strata_n = rep(20, 30)))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 11 01 60 0004 4 0011 59 1 1
#> 12 01 60 0004 4 0012 77 1 1
#> 13 01 60 0004 4 0013 44 0 1
#> 14 01 60 0004 4 0014 61 1 1
#> 23 01 60 0007 2 0023 27 0 1
#> 24 01 60 0007 2 0024 84 0 1
## Stratified random sampling
design <- model +
declare_sampling(S = strata_rs(strata = gender, n = 100))
head(draw_data(design))
#> villages N_households households N_members individuals age gender S
#> 49 01 79 0019 2 0049 32 0 1
#> 72 01 79 0027 2 0072 53 1 1
#> 125 01 79 0050 4 0125 62 0 1
#> 243 02 56 0097 3 0243 58 0 1
#> 283 02 56 0115 2 0283 69 1 1
#> 311 02 56 0128 3 0311 59 0 1