Skip to content

Commit cf4c7b7

Browse files
authored
Merge pull request #1648 from cmu-delphi/ndefries/final-qsf-fixes
Final changelog/codebook changes
2 parents 08d2c2f + dd5e300 commit cf4c7b7

9 files changed

+143
-152
lines changed

facebook/qsf-tools/append-old-changelog-umd.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ add_rationales_from_old_changelog <- function(path_to_changelog, path_to_old_cha
2626
new_version = col_double(),
2727
old_version = col_double()
2828
)) %>%
29-
select(new_version, old_version, variable_name, change_type,eu_version, notes)
29+
select(new_version, old_version, variable_name, change_type, eu_noneu, notes)
3030
changelog <- changelog %>%
3131
select(-notes) %>%
32-
left_join(old_changelog, by=c("new_version", "old_version", "variable_name", "change_type","eu_version"))
32+
left_join(old_changelog, by=c("new_version", "old_version", "variable_name", "change_type", "eu_noneu"))
3333

3434

3535
write_excel_csv(changelog, path_to_changelog, quote="needed")

facebook/qsf-tools/generate-changelog.R

Lines changed: 59 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
suppressPackageStartupMessages({
1313
library(tidyverse)
14+
library(jsonlite)
15+
library(stringr)
1416
})
1517

1618
# "old" = new
@@ -46,7 +48,7 @@ WAVE_COMPARE_MAP <- list(
4648

4749
DIFF_COLS <- c(
4850
"question",
49-
"matrix_subquestion",
51+
"subquestion",
5052
"response_options",
5153
"display_logic",
5254
"response_option_randomization",
@@ -59,9 +61,9 @@ CHANGE_TYPE_MAP <- c(
5961
question = "Question wording changed",
6062
display_logic = "Display logic changed",
6163
response_options = "Answer choices changed",
62-
matrix_subquestion = "Matrix subquestion text changed",
64+
subquestion = "Matrix subquestion text changed",
6365
response_option_randomization = "Answer choice order changed",
64-
respondent_group = "Respondent group changed"
66+
respondent_group = "Display logic changed"
6567
)
6668

6769

@@ -151,33 +153,30 @@ generate_changelog <- function(path_to_codebook,
151153
select(-x_exists, -y_exists)
152154

153155
combos <- added_items %>%
154-
filter(question_type == "Matrix" | !is.na(new_matrix_base_name) | !is.na(new_matrix_subquestion)) %>%
155-
distinct(old_version, new_matrix_base_name)
156+
filter(question_type == "Matrix" | !is.na(new_originating_question) | !is.na(new_subquestion)) %>%
157+
distinct(old_version, new_originating_question)
156158

157159
for (i in seq_len(nrow(combos))) {
158160
wave = combos[i,] %>% pull(old_version)
159-
base_name = combos[i,] %>% pull(new_matrix_base_name)
161+
base_name = combos[i,] %>% pull(new_originating_question)
160162
tmp <- added_items %>%
161163
filter(
162-
old_version == wave, new_matrix_base_name == base_name
164+
old_version == wave, new_originating_question == base_name
163165
)
164166
added_items <- anti_join(added_items, tmp)
165-
if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) {
167+
if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) {
166168
# Dedup subqs so only report base question once
167169
tmp <- tmp %>%
168-
group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>%
170+
group_by(old_originating_question, new_originating_question, new_version, old_version) %>%
169171
mutate(
170-
variable_name = new_matrix_base_name,
171-
old_matrix_subquestion = NA,
172-
new_matrix_subquestion = "Differ by subquestion",
173-
old_response_options = case_when(
174-
length(unique(old_response_options)) == 1 ~ old_response_options,
175-
TRUE ~ "Differ by subquestion"
176-
),
172+
old_subquestion = NA,
173+
new_subquestion = collapse_subq_elements(variable_name, new_subquestion, base_name),
174+
old_response_options = NA,
177175
new_response_options = case_when(
178176
length(unique(new_response_options)) == 1 ~ new_response_options,
179-
TRUE ~ "Differ by subquestion"
180-
)
177+
TRUE ~ rep(collapse_subq_elements(variable_name, new_response_options, base_name), length(new_response_options))
178+
),
179+
variable_name = new_originating_question
181180
) %>%
182181
slice_head() %>%
183182
ungroup()
@@ -205,33 +204,30 @@ generate_changelog <- function(path_to_codebook,
205204
select(-x_exists, -y_exists)
206205

207206
combos <- removed_items %>%
208-
filter(question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) %>%
209-
distinct(new_version, old_matrix_base_name)
207+
filter(question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) %>%
208+
distinct(new_version, old_originating_question)
210209

211210
for (i in seq_len(nrow(combos))) {
212211
wave = combos[i,] %>% pull(new_version)
213-
base_name = combos[i,] %>% pull(old_matrix_base_name)
212+
base_name = combos[i,] %>% pull(old_originating_question)
214213
tmp <- removed_items %>%
215214
filter(
216-
new_version == wave, old_matrix_base_name == base_name
215+
new_version == wave, old_originating_question == base_name
217216
)
218217
removed_items <- anti_join(removed_items, tmp)
219-
if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) {
218+
if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) {
220219
# Dedup subqs so only report base question once
221220
tmp <- tmp %>%
222-
group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>%
221+
group_by(old_originating_question, new_originating_question, new_version, old_version) %>%
223222
mutate(
224-
variable_name = old_matrix_base_name,
225-
old_matrix_subquestion = "Differ by subquestion",
226-
new_matrix_subquestion = NA,
223+
old_subquestion = collapse_subq_elements(variable_name, old_subquestion, base_name),
224+
new_subquestion = NA,
227225
old_response_options = case_when(
228226
length(unique(old_response_options)) == 1 ~ old_response_options,
229-
TRUE ~ "Differ by subquestion"
227+
TRUE ~ rep(collapse_subq_elements(variable_name, old_response_options, base_name), length(old_response_options))
230228
),
231-
new_response_options = case_when(
232-
length(unique(new_response_options)) == 1 ~ new_response_options,
233-
TRUE ~ "Differ by subquestion"
234-
)
229+
new_response_options = NA,
230+
variable_name = old_originating_question
235231
) %>%
236232
slice_head() %>%
237233
ungroup()
@@ -270,11 +266,11 @@ generate_changelog <- function(path_to_codebook,
270266

271267
## Don't report all matrix subquestions when the change is shared between all
272268
## of them, just report the base item.
273-
# Group by matrix_base_name, change_type, and wave, as long as the change_type is relevant and matrix_base_name is not NA.
269+
# Group by originating_question, change_type, and wave, as long as the change_type is relevant and originating_question is not NA.
274270
# Keep only one obs for each group.
275-
# Set var name in kept obs to matrix_base_name for generality and to be able to join rationales on.
271+
# Set var name in kept obs to originating_question for generality and to be able to join rationales on.
276272
combos <- changelog %>%
277-
filter((question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) &
273+
filter((question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) &
278274
change_type %in% c(
279275
"Question wording changed",
280276
"Display logic changed",
@@ -283,7 +279,7 @@ generate_changelog <- function(path_to_codebook,
283279
"Respondent group changed"
284280
)
285281
) %>%
286-
distinct(new_version, old_version, new_matrix_base_name, old_matrix_base_name, change_type)
282+
distinct(new_version, old_version, new_originating_question, old_originating_question, change_type)
287283

288284
SPECIAL_HANDLING <- list(
289285
"Answer choices changed" = list("new_response_options", "old_response_options"),
@@ -292,16 +288,16 @@ generate_changelog <- function(path_to_codebook,
292288
for (i in seq_len(nrow(combos))) {
293289
new_v <- combos[i,] %>% pull(new_version)
294290
old_v <- combos[i,] %>% pull(old_version)
295-
new_base <- combos[i,] %>% pull(new_matrix_base_name)
296-
old_base <- combos[i,] %>% pull(old_matrix_base_name)
291+
new_base <- combos[i,] %>% pull(new_originating_question)
292+
old_base <- combos[i,] %>% pull(old_originating_question)
297293
change <- combos[i,] %>% pull(change_type)
298294

299295
tmp <- changelog %>%
300296
filter(
301297
new_version == new_v,
302298
old_version == old_v,
303-
new_matrix_base_name == new_base,
304-
old_matrix_base_name == old_base,
299+
new_originating_question == new_base,
300+
old_originating_question == old_base,
305301
change_type == change
306302
)
307303
changelog <- anti_join(changelog, tmp)
@@ -316,8 +312,8 @@ generate_changelog <- function(path_to_codebook,
316312
length(unique(tmp[[new_col]])) == 1 &&
317313
length(unique(tmp[[old_col]])) == 1 &&
318314
(
319-
nrow(tmp) == codebook_raw %>% filter(version == old_v, matrix_base_name == old_base) %>% nrow() ||
320-
nrow(tmp) == codebook_raw %>% filter(version == new_v, matrix_base_name == new_base) %>% nrow()
315+
nrow(tmp) == codebook_raw %>% filter(version == old_v, originating_question == old_base) %>% nrow() ||
316+
nrow(tmp) == codebook_raw %>% filter(version == new_v, originating_question == new_base) %>% nrow()
321317
)
322318
) {
323319
combine_flag <- TRUE
@@ -331,11 +327,11 @@ generate_changelog <- function(path_to_codebook,
331327
slice_head() %>%
332328
mutate(
333329
variable_name = case_when(
334-
old_matrix_base_name != new_matrix_base_name ~ paste(old_matrix_base_name, new_matrix_base_name, sep="/"),
335-
TRUE ~ old_matrix_base_name
330+
old_originating_question != new_originating_question ~ paste(old_originating_question, new_originating_question, sep="/"),
331+
TRUE ~ old_originating_question
336332
),
337-
old_matrix_subquestion = NA,
338-
new_matrix_subquestion = NA
333+
old_subquestion = NA,
334+
new_subquestion = NA
339335
)
340336
}
341337

@@ -365,25 +361,25 @@ generate_changelog <- function(path_to_codebook,
365361
rename(
366362
new_question_text = new_question,
367363
old_question_text = old_question,
368-
new_matrix_subquestion_text = new_matrix_subquestion,
369-
old_matrix_subquestion_text = old_matrix_subquestion
364+
new_subquestion_text = new_subquestion,
365+
old_subquestion_text = old_subquestion
370366
) %>%
371367
select(
372368
new_version,
373369
old_version,
374370
variable_name,
375371
description,
376372
change_type,
377-
new_matrix_base_name,
373+
new_originating_question,
378374
new_question_text,
379-
new_matrix_subquestion_text,
375+
new_subquestion_text,
380376
new_response_options,
381377
new_display_logic,
382378
new_response_option_randomization,
383379
new_respondent_group,
384-
old_matrix_base_name,
380+
old_originating_question,
385381
old_question_text,
386-
old_matrix_subquestion_text,
382+
old_subquestion_text,
387383
old_response_options,
388384
old_display_logic,
389385
old_response_option_randomization,
@@ -396,7 +392,7 @@ generate_changelog <- function(path_to_codebook,
396392
}
397393

398394
rename_col <- function(col, prefix) {
399-
if (col %in% c(DIFF_COLS, "matrix_base_name")) {
395+
if (col %in% c(DIFF_COLS, "originating_question")) {
400396
paste(prefix, col, sep = "_")
401397
} else {
402398
col
@@ -411,6 +407,16 @@ get_old_version <- function(new_version, compare_map) {
411407
ifelse(new_version %in% compare_map, compare_map[compare_map == new_version] %>% names(), NA_character_)
412408
}
413409

410+
collapse_subq_elements <- function(variable_name, matrix_field, base_name) {
411+
subq_codes <- str_replace(variable_name, paste0(base_name, "_"), "") %>%
412+
strsplit("_") %>%
413+
# Get the first underscore-delimited chunk. Handles the C10 case, where
414+
# matrix subqs are called C10_<code>_1.
415+
purrr::map(~ .x[1])
416+
matrix_field <- as.list(matrix_field)
417+
names(matrix_field) <- subq_codes
418+
toJSON(matrix_field, auto_unbox = TRUE)
419+
}
414420

415421
args <- commandArgs(TRUE)
416422

0 commit comments

Comments
 (0)