Skip to contents

Declare sampling procedure

Usage

declare_sampling(..., handler = sampling_handler, label = NULL)

sampling_handler(data, ..., legacy = FALSE)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

data

A data.frame.

legacy

Use the legacy randomizr functionality. This will be disabled in future; please use legacy = FALSE.

Value

A sampling declaration, which is a function that takes a data.frame as an argument and returns a data.frame subsetted to sampled observations and (optionally) augmented with inclusion probabilities and other quantities.

Examples

 
# declare_sampling in use
## Two-arm randomized experiment
design <-
  declare_model(
    N = 500,
    X = rep(c(0, 1), each = N / 2),
    U = rnorm(N, sd = 0.25),
    potential_outcomes(Y ~ 0.2 * Z + X + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_sampling(S = complete_rs(N = N, n = 200)) +
  declare_assignment(Z = complete_ra(N = N, m = 100)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE")
  
run_design(design)
#>   inquiry estimand estimator term  estimate  std.error statistic      p.value
#> 1     ATE      0.2 estimator    Z 0.3308988 0.07461297   4.43487 1.526199e-05
#>    conf.low conf.high  df outcome
#> 1 0.1837607 0.4780369 198       Y

# Set up population to sample from
model <- declare_model(
  villages = add_level(
    N = 30, 
    N_households = sample(c(50:100), N, replace = TRUE)
  ),
  households = add_level(
    N = N_households, 
    N_members = sample(c(1, 2, 3, 4), N, 
                       prob = c(0.2, 0.3, 0.25, 0.25), replace = TRUE)
  ),
  individuals = add_level(
    N = N_members, 
    age = sample(18:90, N, replace = TRUE),
    gender = rbinom(n = N, size = 1, prob = .5)
  )
)

# Sampling procedures
## Complete random sampling
design <- model +
  declare_sampling(S = complete_rs(N = N, n = 1000))
  
head(draw_data(design))
#>    villages N_households households N_members individuals age gender S
#> 2        01           69       0001         3        0002  39      1 1
#> 12       01           69       0005         3        0012  67      0 1
#> 41       01           69       0015         2        0041  45      1 1
#> 52       01           69       0020         2        0052  65      0 1
#> 56       01           69       0022         2        0056  89      1 1
#> 70       01           69       0028         3        0070  77      1 1

## Cluster random sampling
design <- model +
  declare_sampling(S = cluster_rs(clusters = villages, 
                                  n = 15))

head(draw_data(design))
#>     villages N_households households N_members individuals age gender S
#> 217       02           54       0085         4        0217  72      1 1
#> 218       02           54       0085         4        0218  38      0 1
#> 219       02           54       0085         4        0219  25      1 1
#> 220       02           54       0085         4        0220  71      1 1
#> 221       02           54       0086         2        0221  34      1 1
#> 222       02           54       0086         2        0222  34      1 1

## Strata and cluster random sampling
design <- model +
  declare_sampling(S  = strata_and_cluster_rs(
    strata = villages,
    clusters = households,
    strata_n = rep(20, 30)))
    
head(draw_data(design))
#>    villages N_households households N_members individuals age gender S
#> 11       01           60       0004         4        0011  59      1 1
#> 12       01           60       0004         4        0012  77      1 1
#> 13       01           60       0004         4        0013  44      0 1
#> 14       01           60       0004         4        0014  61      1 1
#> 23       01           60       0007         2        0023  27      0 1
#> 24       01           60       0007         2        0024  84      0 1

## Stratified random sampling
design <- model +
  declare_sampling(S = strata_rs(strata = gender, n = 100))

head(draw_data(design))
#>     villages N_households households N_members individuals age gender S
#> 49        01           79       0019         2        0049  32      0 1
#> 72        01           79       0027         2        0072  53      1 1
#> 125       01           79       0050         4        0125  62      0 1
#> 243       02           56       0097         3        0243  58      0 1
#> 283       02           56       0115         2        0283  69      1 1
#> 311       02           56       0128         3        0311  59      0 1