#---------------------------------------------------------------------------# # Project: Surfeel # File: Randomise treatment # Author: Boelmann, Morin, Person, Santiago Wolf # Date: 29/11/2024 #---------------------------------------------------------------------------# # This file randomises treatment and control classes within schools, separately # for kindergarten (III.A) and elementary school (III.B) classes. For small # schools with little kindergarten classes, we randomise on the school level. # Input file: Classes_overview.csv. # Output file: randomisation_classes.csv. # This will be run on R version 4.3.2, running under macOS 15.1.1. # Paths RAND_PATH <- paste0(getwd(), "/") # Packages library(dplyr) # version used: dplyr_1.1.4 #---------------------------------------------------------------------------# # I. Preparation #---------------------------------------------------------------------------# # Set seed. # Seed will be changed to DAX Index value at 2 Dec 2024, 11:00 am CET. set.seed(19458.37) #set.seed(0) # Read in class level data. classes <- read.csv(paste0(RAND_PATH, "Classes_overview.csv")) #---------------------------------------------------------------------------# # II. Function definitions #---------------------------------------------------------------------------# # Function to randomise within school #---------------------------------------------------------------------------# rand_classes <- function(df_classes) { # Add a new column "treat" to the dataset which is deterministic for now so that # we ensure to have both 0 and 1 in each school. These values 0 and 1 will later # be "shuffled" randomly within schools. df_classes <- df_classes %>% group_by(school) %>% mutate(treat = { n <- n() # Number of observations in the school treat_values <- rep(c(1, 0), length.out = n) # Ensure both 0 and 1 exist }) %>% ungroup() # Calculate the number of observations per school. school_counts <- df_classes %>% group_by(school) %>% summarise(count = n()) # Identify schools with uneven number of observations. uneven_schools <- school_counts %>% filter(count %% 2 != 0) %>% pull(school) # Number of schools with uneven observations. nb_schools_uneven <- length(uneven_schools) # Half of these with uneven numbers need to get one 1 replaced to a 0 to ensure # overall balance. If the list of uneven-class schools is of uneven length # itself, we randomly decide whether to take one more or one less. nb_schools_need_change=floor(nb_schools_uneven/2) if (nb_schools_uneven %% 2 != 0) { nb_schools_need_change= nb_schools_need_change + sample(c(0, 1), size = 1) } # Randomly sample schools where change will be made. schools_need_change <- sample(uneven_schools, size = nb_schools_need_change, replace = FALSE) # Adjust one value of treat if sampled. df_classes <- df_classes %>% group_by(school) %>% mutate(treat = ifelse(school %in% schools_need_change & row_number() == 1, 0, treat)) %>% ungroup() # Randomly shuffle the 0,1 variables within a school. df_classes <- df_classes %>% group_by(school) %>% mutate(treat = sample(treat)) %>% # Randomize treat within each school group, see https://stackoverflow.com/questions/67195016/randomly-sort-rows-within-group ungroup() # Return the processed dataset. return(df_classes) } # Function to randomise across schools #---------------------------------------------------------------------------# rand_schools <- function(df_classes) { # Get the list of unique schools. unique_schools <- unique(df_classes$school) # Decide how many schools to treat. If not an even number, randomly decide if # one more or less is treated. nb_schools_treat=floor(length(unique_schools) / 2) if (length(unique_schools) %% 2 != 0) { nb_schools_treat= nb_schools_treat+ sample(c(0, 1), size = 1) } # Randomly sample half of the unique schools. treated_schools <- sample(unique_schools, size = nb_schools_treat, replace = FALSE) # Create the indicator variable treat df_classes <- df_classes %>% mutate(treat = ifelse(school %in% treated_schools, 1, 0)) # Return the processed dataset. return(df_classes) } #---------------------------------------------------------------------------# # III. Randomise #---------------------------------------------------------------------------# ## A) Kindergarten classes #---------------------------------------------------------------------------# # Get kindergarten classes classes_kg <- classes[classes$type == "Kindergarden", c("district", "school", "class")] ### In Lisbon, we can randomise within school. classes_kg_1 <- rand_classes(classes_kg %>% filter(district=="Lisbon")) # Verify overall balance. table(classes_kg_1$treat) # Check if both 0 and 1 exist in the 'treat' column in each school. classes_kg_1 %>% group_by(school) %>% summarise(has_treat_0 = any(treat == 0), has_treat_1 = any(treat == 1)) ### For other districts, we randomise on school level. classes_kg_2 <- rand_schools(classes_kg %>% filter(district != "Lisbon")) ## B) Elementary school classes #---------------------------------------------------------------------------# # We randomise within schools. # Get classes. classes_el <- classes[classes$type == "Elementary", c("district", "school", "class")] # Randomise within schools. classes_el <- rand_classes(classes_el) # Verify overall balance. table(classes_el$treat) # Check if both 0 and 1 exist in the 'treat' column in each school. classes_el %>% group_by(school) %>% summarise(has_treat_0 = any(treat == 0), has_treat_1 = any(treat == 1)) #---------------------------------------------------------------------------# # IV. Export results #---------------------------------------------------------------------------# # Append kindergarten and elementary school. combined_classes <- bind_rows(classes_kg_1, classes_kg_2, classes_el) %>% arrange(district, school, treat) # Verify overall balance. table(combined_classes$treat) # Check if both 0 and 1 exist in the 'treat' column in each school with # kindergarten exception. combined_classes %>% group_by(school) %>% summarise(has_treat_0 = any(treat == 0), has_treat_1 = any(treat == 1)) # Export as csv. write.csv(combined_classes, paste0(RAND_PATH, "randomisation_classes.csv")) #---------------------------------------------------------------------------# # End of file #---------------------------------------------------------------------------#