11
11
12
12
suppressPackageStartupMessages({
13
13
library(tidyverse )
14
+ library(jsonlite )
15
+ library(stringr )
14
16
})
15
17
16
18
# "old" = new
@@ -46,7 +48,7 @@ WAVE_COMPARE_MAP <- list(
46
48
47
49
DIFF_COLS <- c(
48
50
" question" ,
49
- " matrix_subquestion " ,
51
+ " subquestion " ,
50
52
" response_options" ,
51
53
" display_logic" ,
52
54
" response_option_randomization" ,
@@ -59,9 +61,9 @@ CHANGE_TYPE_MAP <- c(
59
61
question = " Question wording changed" ,
60
62
display_logic = " Display logic changed" ,
61
63
response_options = " Answer choices changed" ,
62
- matrix_subquestion = " Matrix subquestion text changed" ,
64
+ subquestion = " Matrix subquestion text changed" ,
63
65
response_option_randomization = " Answer choice order changed" ,
64
- respondent_group = " Respondent group changed"
66
+ respondent_group = " Display logic changed"
65
67
)
66
68
67
69
@@ -151,33 +153,30 @@ generate_changelog <- function(path_to_codebook,
151
153
select(- x_exists , - y_exists )
152
154
153
155
combos <- added_items %> %
154
- filter(question_type == " Matrix" | ! is.na(new_matrix_base_name ) | ! is.na(new_matrix_subquestion )) %> %
155
- distinct(old_version , new_matrix_base_name )
156
+ filter(question_type == " Matrix" | ! is.na(new_originating_question ) | ! is.na(new_subquestion )) %> %
157
+ distinct(old_version , new_originating_question )
156
158
157
159
for (i in seq_len(nrow(combos ))) {
158
160
wave = combos [i ,] %> % pull(old_version )
159
- base_name = combos [i ,] %> % pull(new_matrix_base_name )
161
+ base_name = combos [i ,] %> % pull(new_originating_question )
160
162
tmp <- added_items %> %
161
163
filter(
162
- old_version == wave , new_matrix_base_name == base_name
164
+ old_version == wave , new_originating_question == base_name
163
165
)
164
166
added_items <- anti_join(added_items , tmp )
165
- if (nrow(filter(codebook_raw , version == wave , matrix_base_name == base_name )) == 0 ) {
167
+ if (nrow(filter(codebook_raw , version == wave , originating_question == base_name )) == 0 ) {
166
168
# Dedup subqs so only report base question once
167
169
tmp <- tmp %> %
168
- group_by(old_matrix_base_name , new_matrix_base_name , new_version , old_version ) %> %
170
+ group_by(old_originating_question , new_originating_question , new_version , old_version ) %> %
169
171
mutate(
170
- variable_name = new_matrix_base_name ,
171
- old_matrix_subquestion = NA ,
172
- new_matrix_subquestion = " Differ by subquestion" ,
173
- old_response_options = case_when(
174
- length(unique(old_response_options )) == 1 ~ old_response_options ,
175
- TRUE ~ " Differ by subquestion"
176
- ),
172
+ old_subquestion = NA ,
173
+ new_subquestion = collapse_subq_elements(variable_name , new_subquestion , base_name ),
174
+ old_response_options = NA ,
177
175
new_response_options = case_when(
178
176
length(unique(new_response_options )) == 1 ~ new_response_options ,
179
- TRUE ~ " Differ by subquestion"
180
- )
177
+ TRUE ~ rep(collapse_subq_elements(variable_name , new_response_options , base_name ), length(new_response_options ))
178
+ ),
179
+ variable_name = new_originating_question
181
180
) %> %
182
181
slice_head() %> %
183
182
ungroup()
@@ -205,33 +204,30 @@ generate_changelog <- function(path_to_codebook,
205
204
select(- x_exists , - y_exists )
206
205
207
206
combos <- removed_items %> %
208
- filter(question_type == " Matrix" | ! is.na(old_matrix_base_name ) | ! is.na(old_matrix_subquestion )) %> %
209
- distinct(new_version , old_matrix_base_name )
207
+ filter(question_type == " Matrix" | ! is.na(old_originating_question ) | ! is.na(old_subquestion )) %> %
208
+ distinct(new_version , old_originating_question )
210
209
211
210
for (i in seq_len(nrow(combos ))) {
212
211
wave = combos [i ,] %> % pull(new_version )
213
- base_name = combos [i ,] %> % pull(old_matrix_base_name )
212
+ base_name = combos [i ,] %> % pull(old_originating_question )
214
213
tmp <- removed_items %> %
215
214
filter(
216
- new_version == wave , old_matrix_base_name == base_name
215
+ new_version == wave , old_originating_question == base_name
217
216
)
218
217
removed_items <- anti_join(removed_items , tmp )
219
- if (nrow(filter(codebook_raw , version == wave , matrix_base_name == base_name )) == 0 ) {
218
+ if (nrow(filter(codebook_raw , version == wave , originating_question == base_name )) == 0 ) {
220
219
# Dedup subqs so only report base question once
221
220
tmp <- tmp %> %
222
- group_by(old_matrix_base_name , new_matrix_base_name , new_version , old_version ) %> %
221
+ group_by(old_originating_question , new_originating_question , new_version , old_version ) %> %
223
222
mutate(
224
- variable_name = old_matrix_base_name ,
225
- old_matrix_subquestion = " Differ by subquestion" ,
226
- new_matrix_subquestion = NA ,
223
+ old_subquestion = collapse_subq_elements(variable_name , old_subquestion , base_name ),
224
+ new_subquestion = NA ,
227
225
old_response_options = case_when(
228
226
length(unique(old_response_options )) == 1 ~ old_response_options ,
229
- TRUE ~ " Differ by subquestion "
227
+ TRUE ~ rep(collapse_subq_elements( variable_name , old_response_options , base_name ), length( old_response_options ))
230
228
),
231
- new_response_options = case_when(
232
- length(unique(new_response_options )) == 1 ~ new_response_options ,
233
- TRUE ~ " Differ by subquestion"
234
- )
229
+ new_response_options = NA ,
230
+ variable_name = old_originating_question
235
231
) %> %
236
232
slice_head() %> %
237
233
ungroup()
@@ -270,11 +266,11 @@ generate_changelog <- function(path_to_codebook,
270
266
271
267
# # Don't report all matrix subquestions when the change is shared between all
272
268
# # of them, just report the base item.
273
- # Group by matrix_base_name , change_type, and wave, as long as the change_type is relevant and matrix_base_name is not NA.
269
+ # Group by originating_question , change_type, and wave, as long as the change_type is relevant and originating_question is not NA.
274
270
# Keep only one obs for each group.
275
- # Set var name in kept obs to matrix_base_name for generality and to be able to join rationales on.
271
+ # Set var name in kept obs to originating_question for generality and to be able to join rationales on.
276
272
combos <- changelog %> %
277
- filter((question_type == " Matrix" | ! is.na(old_matrix_base_name ) | ! is.na(old_matrix_subquestion )) &
273
+ filter((question_type == " Matrix" | ! is.na(old_originating_question ) | ! is.na(old_subquestion )) &
278
274
change_type %in% c(
279
275
" Question wording changed" ,
280
276
" Display logic changed" ,
@@ -283,7 +279,7 @@ generate_changelog <- function(path_to_codebook,
283
279
" Respondent group changed"
284
280
)
285
281
) %> %
286
- distinct(new_version , old_version , new_matrix_base_name , old_matrix_base_name , change_type )
282
+ distinct(new_version , old_version , new_originating_question , old_originating_question , change_type )
287
283
288
284
SPECIAL_HANDLING <- list (
289
285
" Answer choices changed" = list (" new_response_options" , " old_response_options" ),
@@ -292,16 +288,16 @@ generate_changelog <- function(path_to_codebook,
292
288
for (i in seq_len(nrow(combos ))) {
293
289
new_v <- combos [i ,] %> % pull(new_version )
294
290
old_v <- combos [i ,] %> % pull(old_version )
295
- new_base <- combos [i ,] %> % pull(new_matrix_base_name )
296
- old_base <- combos [i ,] %> % pull(old_matrix_base_name )
291
+ new_base <- combos [i ,] %> % pull(new_originating_question )
292
+ old_base <- combos [i ,] %> % pull(old_originating_question )
297
293
change <- combos [i ,] %> % pull(change_type )
298
294
299
295
tmp <- changelog %> %
300
296
filter(
301
297
new_version == new_v ,
302
298
old_version == old_v ,
303
- new_matrix_base_name == new_base ,
304
- old_matrix_base_name == old_base ,
299
+ new_originating_question == new_base ,
300
+ old_originating_question == old_base ,
305
301
change_type == change
306
302
)
307
303
changelog <- anti_join(changelog , tmp )
@@ -316,8 +312,8 @@ generate_changelog <- function(path_to_codebook,
316
312
length(unique(tmp [[new_col ]])) == 1 &&
317
313
length(unique(tmp [[old_col ]])) == 1 &&
318
314
(
319
- nrow(tmp ) == codebook_raw %> % filter(version == old_v , matrix_base_name == old_base ) %> % nrow() ||
320
- nrow(tmp ) == codebook_raw %> % filter(version == new_v , matrix_base_name == new_base ) %> % nrow()
315
+ nrow(tmp ) == codebook_raw %> % filter(version == old_v , originating_question == old_base ) %> % nrow() ||
316
+ nrow(tmp ) == codebook_raw %> % filter(version == new_v , originating_question == new_base ) %> % nrow()
321
317
)
322
318
) {
323
319
combine_flag <- TRUE
@@ -331,11 +327,11 @@ generate_changelog <- function(path_to_codebook,
331
327
slice_head() %> %
332
328
mutate(
333
329
variable_name = case_when(
334
- old_matrix_base_name != new_matrix_base_name ~ paste(old_matrix_base_name , new_matrix_base_name , sep = " /" ),
335
- TRUE ~ old_matrix_base_name
330
+ old_originating_question != new_originating_question ~ paste(old_originating_question , new_originating_question , sep = " /" ),
331
+ TRUE ~ old_originating_question
336
332
),
337
- old_matrix_subquestion = NA ,
338
- new_matrix_subquestion = NA
333
+ old_subquestion = NA ,
334
+ new_subquestion = NA
339
335
)
340
336
}
341
337
@@ -365,25 +361,25 @@ generate_changelog <- function(path_to_codebook,
365
361
rename(
366
362
new_question_text = new_question ,
367
363
old_question_text = old_question ,
368
- new_matrix_subquestion_text = new_matrix_subquestion ,
369
- old_matrix_subquestion_text = old_matrix_subquestion
364
+ new_subquestion_text = new_subquestion ,
365
+ old_subquestion_text = old_subquestion
370
366
) %> %
371
367
select(
372
368
new_version ,
373
369
old_version ,
374
370
variable_name ,
375
371
description ,
376
372
change_type ,
377
- new_matrix_base_name ,
373
+ new_originating_question ,
378
374
new_question_text ,
379
- new_matrix_subquestion_text ,
375
+ new_subquestion_text ,
380
376
new_response_options ,
381
377
new_display_logic ,
382
378
new_response_option_randomization ,
383
379
new_respondent_group ,
384
- old_matrix_base_name ,
380
+ old_originating_question ,
385
381
old_question_text ,
386
- old_matrix_subquestion_text ,
382
+ old_subquestion_text ,
387
383
old_response_options ,
388
384
old_display_logic ,
389
385
old_response_option_randomization ,
@@ -396,7 +392,7 @@ generate_changelog <- function(path_to_codebook,
396
392
}
397
393
398
394
rename_col <- function (col , prefix ) {
399
- if (col %in% c(DIFF_COLS , " matrix_base_name " )) {
395
+ if (col %in% c(DIFF_COLS , " originating_question " )) {
400
396
paste(prefix , col , sep = " _" )
401
397
} else {
402
398
col
@@ -411,6 +407,16 @@ get_old_version <- function(new_version, compare_map) {
411
407
ifelse(new_version %in% compare_map , compare_map [compare_map == new_version ] %> % names(), NA_character_ )
412
408
}
413
409
410
+ collapse_subq_elements <- function (variable_name , matrix_field , base_name ) {
411
+ subq_codes <- str_replace(variable_name , paste0(base_name , " _" ), " " ) %> %
412
+ strsplit(" _" ) %> %
413
+ # Get the first underscore-delimited chunk. Handles the C10 case, where
414
+ # matrix subqs are called C10_<code>_1.
415
+ purrr :: map(~ .x [1 ])
416
+ matrix_field <- as.list(matrix_field )
417
+ names(matrix_field ) <- subq_codes
418
+ toJSON(matrix_field , auto_unbox = TRUE )
419
+ }
414
420
415
421
args <- commandArgs(TRUE )
416
422
0 commit comments