Randomization Method
Done using R in office using the following randomization code and seed.
# Randomize government schools
here::i_am("code/randomize_baseline.R")
pacman::p_load(tidyverse, randomizr)
gov_schools_orf_df = haven::read_dta(here::here(
"data",
"clean_baseline_gov_schools.dta"
))
orf_df = haven::read_dta(here::here("data", "clean_baseline_gov_students.dta"))
set.seed(612873201)
# Create stratification cells: 4 schools per cell
# There are three schools that ran no assessments: they are in a strata of three together
# There are 62 remaining after that, so 2 over 4 per cell. Those are added to cells roughly in the middle
gov_schools_orf_df = gov_schools_orf_df |>
arrange(letter_sound_knowledge_eb) |>
mutate(
stratification_cell = c(
rep(1:6, each = 4), # First 24 schools: 6 strata of 4
rep(7, 4), # Next 4 schools: 1 stratum of 4 (was 5)
rep(8, 4), # Next 4 schools: 1 stratum of 4 (was 5)
rep(9:15, each = 4), # Next 28 schools: 7 strata of 4
rep(16, 3) # Last 3 schools: 1 stratum of 3
)
) |>
mutate(
treat = block_ra(
blocks = stratification_cell
)
) |>
relocate(treat, .after = school) |>
relocate(stratification_cell, .after = treat)
haven::write_dta(
gov_schools_orf_df,
here::here("data", "clean_baseline_gov_schools_with_treatment.dta")
)
# Merge with student level
tmp_schools = gov_schools_orf_df |> select(school, treat, stratification_cell)
orf_df = orf_df |>
left_join(tmp_schools, by = join_by(school)) |>
relocate(treat, .after = student_name) |>
relocate(stratification_cell, .after = treat)
orf_df |>
haven::write_dta(here::here(
"data",
"clean_baseline_gov_students_with_treatment.dta"
))
#### Randomize private schools
private_schools_df = haven::read_dta(here::here(
"data",
"clean_baseline_private_schools.dta"
))
private_students_df = haven::read_dta(here::here(
"data",
"clean_basleine_private_students.dta"
))
set.seed(12398173)
# Firsts randomize 24 private schools that have an observation for beginning of the year assessments
# Create stratification cells: 4 schools per cell
private_schools = private_schools_df |>
filter(!is.na(letter_sound_knowledge_eb)) |>
arrange(letter_sound_knowledge_eb) |>
mutate(
stratification_cell = c(
rep(1:6, each = 4) + 16 # There are 16 strat cells from gov schools
)
) |>
mutate(
treat = block_ra(
blocks = stratification_cell
)
) |>
relocate(treat, .after = school) |>
relocate(stratification_cell, .after = treat)
# Now randomize 16 schools that didn't have an assessment
private_schools_egra = private_schools_df |>
filter(is.na(letter_sound_knowledge_eb)) |>
arrange(letter_sound_knowledge_eb) |>
mutate(
stratification_cell = c(
rep(1:4, each = 4) + 22 # There are 16 strat cells from gov schools and 6 from first batch of private schools
)
) |>
mutate(
treat = block_ra(
blocks = stratification_cell
)
) |>
relocate(treat, .after = school) |>
relocate(stratification_cell, .after = treat)
# Combine
private_schools_combined_df = private_schools |>
bind_rows(private_schools_egra)
haven::write_dta(
private_schools_combined_df,
here::here("data", "clean_baseline_private_schools_with_treatment.dta")
)
# Write treated schools list for Simon
gov_schools_orf_df |>
filter(treat == 1) |>
select(school) |>
writexl::write_xlsx(here::here("data", "treated_gov_schools.xlsx"))
private_schools_combined_df |>
filter(treat == 1) |>
select(school) |>
writexl::write_xlsx(here::here("data", "treated_private_schools.xlsx"))
# Combine into one data set
all_schools_df = gov_schools_orf_df |>
bind_rows(private_schools_combined_df) |>
rename(treat_parent = treat)
### Now cross-randomize school leader training
# This is done using the same stratification cels
# So basically within strat cell 1, the two treat schools are split and the two control schools are split
set.seed(312323814)
cross_randomized_df = map(
seq_along(1:max(all_schools_df$stratification_cell)),
function(i) {
all_schools_df |>
filter(stratification_cell == {{ i }}) |>
mutate(treat_school_leader = block_ra(blocks = treat_parent)) |>
relocate(treat_school_leader, .after = treat_parent)
}
) |>
bind_rows()
# Write to excel
cross_randomized_df |>
haven::write_dta(here::here(
"data",
"full_treatment_list.dta"
))