Skip to contents

Declare sampling procedure

Usage

declare_sampling(..., handler = sampling_handler, label = NULL)

sampling_handler(data, ..., legacy = FALSE)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

data

A data.frame.

legacy

Use the legacy randomizr functionality. This will be disabled in future; please use legacy = FALSE.

Value

A sampling declaration, which is a function that takes a data.frame as an argument and returns a data.frame subsetted to sampled observations and (optionally) augmented with inclusion probabilities and other quantities.

Examples

 
# declare_sampling in use
## Two-arm randomized experiment
design <-
  declare_model(
    N = 500,
    X = rep(c(0, 1), each = N / 2),
    U = rnorm(N, sd = 0.25),
    potential_outcomes(Y ~ 0.2 * Z + X + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_sampling(S = complete_rs(N = N, n = 200)) +
  declare_assignment(Z = complete_ra(N = N, m = 100)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE")
  
run_design(design)
#>   inquiry estimand estimator term  estimate  std.error statistic    p.value
#> 1     ATE      0.2 estimator    Z 0.1584226 0.07939867   1.99528 0.04738339
#>      conf.low conf.high  df outcome
#> 1 0.001847013 0.3149981 198       Y

# Set up population to sample from
model <- declare_model(
  villages = add_level(
    N = 30, 
    N_households = sample(c(50:100), N, replace = TRUE)
  ),
  households = add_level(
    N = N_households, 
    N_members = sample(c(1, 2, 3, 4), N, 
                       prob = c(0.2, 0.3, 0.25, 0.25), replace = TRUE)
  ),
  individuals = add_level(
    N = N_members, 
    age = sample(18:90, N, replace = TRUE),
    gender = rbinom(n = N, size = 1, prob = .5)
  )
)

# Sampling procedures
## Complete random sampling
design <- model +
  declare_sampling(S = complete_rs(N = N, n = 1000))
  
head(draw_data(design))
#>    villages N_households households N_members individuals age gender S
#> 6        01           86       0002         4        0006  49      0 1
#> 13       01           86       0004         4        0013  48      1 1
#> 16       01           86       0005         3        0016  33      0 1
#> 18       01           86       0006         4        0018  85      0 1
#> 21       01           86       0006         4        0021  42      0 1
#> 29       01           86       0009         4        0029  84      1 1

## Cluster random sampling
design <- model +
  declare_sampling(S = cluster_rs(clusters = villages, 
                                  n = 15))

head(draw_data(design))
#>     villages N_households households N_members individuals age gender S
#> 473       03           88       0184         2        0473  45      0 1
#> 474       03           88       0184         2        0474  70      1 1
#> 475       03           88       0185         2        0475  29      1 1
#> 476       03           88       0185         2        0476  86      1 1
#> 477       03           88       0186         3        0477  40      1 1
#> 478       03           88       0186         3        0478  77      1 1

## Strata and cluster random sampling
design <- model +
  declare_sampling(S  = strata_and_cluster_rs(
    strata = villages,
    clusters = households,
    strata_n = rep(20, 30)))
    
head(draw_data(design))
#>    villages N_households households N_members individuals age gender S
#> 1        01           84       0001         2        0001  18      0 1
#> 2        01           84       0001         2        0002  31      0 1
#> 3        01           84       0002         1        0003  55      0 1
#> 15       01           84       0008         2        0015  70      1 1
#> 16       01           84       0008         2        0016  50      0 1
#> 26       01           84       0014         4        0026  84      1 1

## Stratified random sampling
design <- model +
  declare_sampling(S = strata_rs(strata = gender, n = 100))

head(draw_data(design))
#>    villages N_households households N_members individuals age gender S
#> 23       01           80       0009         1        0023  57      1 1
#> 24       01           80       0010         3        0024  80      0 1
#> 52       01           80       0020         3        0052  71      1 1
#> 58       01           80       0023         1        0058  46      1 1
#> 67       01           80       0028         3        0067  77      0 1
#> 79       01           80       0031         4        0079  35      0 1