-
Notifications
You must be signed in to change notification settings - Fork 16
Final changelog/codebook changes #1648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
91b7fc6
16e78f6
9bfaf08
5d704ce
336be6f
2bc9462
aeb22f9
d0735a2
40cb527
1367689
b39af55
d8254bf
f0d32cd
f466131
351b348
2a2e80e
5535a53
dd5e300
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ | |
|
||
suppressPackageStartupMessages({ | ||
library(tidyverse) | ||
library(jsonlite) | ||
library(stringr) | ||
}) | ||
|
||
# "old" = new | ||
|
@@ -46,7 +48,7 @@ WAVE_COMPARE_MAP <- list( | |
|
||
DIFF_COLS <- c( | ||
"question", | ||
"matrix_subquestion", | ||
"subquestion", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our microdata documentation currently describes the column as
What should that say for the updated column? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CMU documentation still says Wichada has reviewed the codebooks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand your first sentence. Are you saying CMU microdata files will still contain a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CMU codebook files still currently contain a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't plan to regenerate the CMU codebooks again, that's fine, we can leave the documentation as-is. I was just confused because I thought this meant the column name was changing in our files. You may want to make sure Zheng changes the UMD microdata documentation, since a lot of the parts were copied from our documentation. |
||
"response_options", | ||
"display_logic", | ||
"response_option_randomization", | ||
|
@@ -59,9 +61,9 @@ CHANGE_TYPE_MAP <- c( | |
question = "Question wording changed", | ||
display_logic = "Display logic changed", | ||
response_options = "Answer choices changed", | ||
matrix_subquestion = "Matrix subquestion text changed", | ||
subquestion = "Matrix subquestion text changed", | ||
response_option_randomization = "Answer choice order changed", | ||
respondent_group = "Respondent group changed" | ||
respondent_group = "Display logic changed" | ||
) | ||
|
||
|
||
|
@@ -151,33 +153,30 @@ generate_changelog <- function(path_to_codebook, | |
select(-x_exists, -y_exists) | ||
|
||
combos <- added_items %>% | ||
filter(question_type == "Matrix" | !is.na(new_matrix_base_name) | !is.na(new_matrix_subquestion)) %>% | ||
distinct(old_version, new_matrix_base_name) | ||
filter(question_type == "Matrix" | !is.na(new_originating_question) | !is.na(new_subquestion)) %>% | ||
distinct(old_version, new_originating_question) | ||
|
||
for (i in seq_len(nrow(combos))) { | ||
wave = combos[i,] %>% pull(old_version) | ||
base_name = combos[i,] %>% pull(new_matrix_base_name) | ||
base_name = combos[i,] %>% pull(new_originating_question) | ||
tmp <- added_items %>% | ||
filter( | ||
old_version == wave, new_matrix_base_name == base_name | ||
old_version == wave, new_originating_question == base_name | ||
) | ||
added_items <- anti_join(added_items, tmp) | ||
if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) { | ||
if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) { | ||
# Dedup subqs so only report base question once | ||
tmp <- tmp %>% | ||
group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>% | ||
group_by(old_originating_question, new_originating_question, new_version, old_version) %>% | ||
mutate( | ||
variable_name = new_matrix_base_name, | ||
old_matrix_subquestion = NA, | ||
new_matrix_subquestion = "Differ by subquestion", | ||
old_response_options = case_when( | ||
length(unique(old_response_options)) == 1 ~ old_response_options, | ||
TRUE ~ "Differ by subquestion" | ||
), | ||
old_subquestion = NA, | ||
new_subquestion = collapse_subq_elements(variable_name, new_subquestion, base_name), | ||
old_response_options = NA, | ||
new_response_options = case_when( | ||
length(unique(new_response_options)) == 1 ~ new_response_options, | ||
TRUE ~ "Differ by subquestion" | ||
) | ||
TRUE ~ rep(collapse_subq_elements(variable_name, new_response_options, base_name), length(new_response_options)) | ||
), | ||
variable_name = new_originating_question | ||
) %>% | ||
slice_head() %>% | ||
ungroup() | ||
|
@@ -205,33 +204,30 @@ generate_changelog <- function(path_to_codebook, | |
select(-x_exists, -y_exists) | ||
|
||
combos <- removed_items %>% | ||
filter(question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) %>% | ||
distinct(new_version, old_matrix_base_name) | ||
filter(question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) %>% | ||
distinct(new_version, old_originating_question) | ||
|
||
for (i in seq_len(nrow(combos))) { | ||
wave = combos[i,] %>% pull(new_version) | ||
base_name = combos[i,] %>% pull(old_matrix_base_name) | ||
base_name = combos[i,] %>% pull(old_originating_question) | ||
tmp <- removed_items %>% | ||
filter( | ||
new_version == wave, old_matrix_base_name == base_name | ||
new_version == wave, old_originating_question == base_name | ||
) | ||
removed_items <- anti_join(removed_items, tmp) | ||
if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) { | ||
if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) { | ||
# Dedup subqs so only report base question once | ||
tmp <- tmp %>% | ||
group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>% | ||
group_by(old_originating_question, new_originating_question, new_version, old_version) %>% | ||
mutate( | ||
variable_name = old_matrix_base_name, | ||
old_matrix_subquestion = "Differ by subquestion", | ||
new_matrix_subquestion = NA, | ||
old_subquestion = collapse_subq_elements(variable_name, old_subquestion, base_name), | ||
new_subquestion = NA, | ||
old_response_options = case_when( | ||
length(unique(old_response_options)) == 1 ~ old_response_options, | ||
TRUE ~ "Differ by subquestion" | ||
TRUE ~ rep(collapse_subq_elements(variable_name, old_response_options, base_name), length(old_response_options)) | ||
), | ||
new_response_options = case_when( | ||
length(unique(new_response_options)) == 1 ~ new_response_options, | ||
TRUE ~ "Differ by subquestion" | ||
) | ||
new_response_options = NA, | ||
variable_name = old_originating_question | ||
) %>% | ||
slice_head() %>% | ||
ungroup() | ||
|
@@ -270,11 +266,11 @@ generate_changelog <- function(path_to_codebook, | |
|
||
## Don't report all matrix subquestions when the change is shared between all | ||
## of them, just report the base item. | ||
# Group by matrix_base_name, change_type, and wave, as long as the change_type is relevant and matrix_base_name is not NA. | ||
# Group by originating_question, change_type, and wave, as long as the change_type is relevant and originating_question is not NA. | ||
# Keep only one obs for each group. | ||
# Set var name in kept obs to matrix_base_name for generality and to be able to join rationales on. | ||
# Set var name in kept obs to originating_question for generality and to be able to join rationales on. | ||
combos <- changelog %>% | ||
filter((question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) & | ||
filter((question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) & | ||
change_type %in% c( | ||
"Question wording changed", | ||
"Display logic changed", | ||
|
@@ -283,7 +279,7 @@ generate_changelog <- function(path_to_codebook, | |
"Respondent group changed" | ||
) | ||
) %>% | ||
distinct(new_version, old_version, new_matrix_base_name, old_matrix_base_name, change_type) | ||
distinct(new_version, old_version, new_originating_question, old_originating_question, change_type) | ||
|
||
SPECIAL_HANDLING <- list( | ||
"Answer choices changed" = list("new_response_options", "old_response_options"), | ||
|
@@ -292,16 +288,16 @@ generate_changelog <- function(path_to_codebook, | |
for (i in seq_len(nrow(combos))) { | ||
new_v <- combos[i,] %>% pull(new_version) | ||
old_v <- combos[i,] %>% pull(old_version) | ||
new_base <- combos[i,] %>% pull(new_matrix_base_name) | ||
old_base <- combos[i,] %>% pull(old_matrix_base_name) | ||
new_base <- combos[i,] %>% pull(new_originating_question) | ||
old_base <- combos[i,] %>% pull(old_originating_question) | ||
change <- combos[i,] %>% pull(change_type) | ||
|
||
tmp <- changelog %>% | ||
filter( | ||
new_version == new_v, | ||
old_version == old_v, | ||
new_matrix_base_name == new_base, | ||
old_matrix_base_name == old_base, | ||
new_originating_question == new_base, | ||
old_originating_question == old_base, | ||
change_type == change | ||
) | ||
changelog <- anti_join(changelog, tmp) | ||
|
@@ -316,8 +312,8 @@ generate_changelog <- function(path_to_codebook, | |
length(unique(tmp[[new_col]])) == 1 && | ||
length(unique(tmp[[old_col]])) == 1 && | ||
( | ||
nrow(tmp) == codebook_raw %>% filter(version == old_v, matrix_base_name == old_base) %>% nrow() || | ||
nrow(tmp) == codebook_raw %>% filter(version == new_v, matrix_base_name == new_base) %>% nrow() | ||
nrow(tmp) == codebook_raw %>% filter(version == old_v, originating_question == old_base) %>% nrow() || | ||
nrow(tmp) == codebook_raw %>% filter(version == new_v, originating_question == new_base) %>% nrow() | ||
) | ||
) { | ||
combine_flag <- TRUE | ||
|
@@ -331,11 +327,11 @@ generate_changelog <- function(path_to_codebook, | |
slice_head() %>% | ||
mutate( | ||
variable_name = case_when( | ||
old_matrix_base_name != new_matrix_base_name ~ paste(old_matrix_base_name, new_matrix_base_name, sep="/"), | ||
TRUE ~ old_matrix_base_name | ||
old_originating_question != new_originating_question ~ paste(old_originating_question, new_originating_question, sep="/"), | ||
TRUE ~ old_originating_question | ||
), | ||
old_matrix_subquestion = NA, | ||
new_matrix_subquestion = NA | ||
old_subquestion = NA, | ||
new_subquestion = NA | ||
) | ||
} | ||
|
||
|
@@ -365,25 +361,25 @@ generate_changelog <- function(path_to_codebook, | |
rename( | ||
new_question_text = new_question, | ||
old_question_text = old_question, | ||
new_matrix_subquestion_text = new_matrix_subquestion, | ||
old_matrix_subquestion_text = old_matrix_subquestion | ||
new_subquestion_text = new_subquestion, | ||
old_subquestion_text = old_subquestion | ||
) %>% | ||
select( | ||
new_version, | ||
old_version, | ||
variable_name, | ||
description, | ||
change_type, | ||
new_matrix_base_name, | ||
new_originating_question, | ||
new_question_text, | ||
new_matrix_subquestion_text, | ||
new_subquestion_text, | ||
new_response_options, | ||
new_display_logic, | ||
new_response_option_randomization, | ||
new_respondent_group, | ||
old_matrix_base_name, | ||
old_originating_question, | ||
old_question_text, | ||
old_matrix_subquestion_text, | ||
old_subquestion_text, | ||
old_response_options, | ||
old_display_logic, | ||
old_response_option_randomization, | ||
|
@@ -396,7 +392,7 @@ generate_changelog <- function(path_to_codebook, | |
} | ||
|
||
rename_col <- function(col, prefix) { | ||
if (col %in% c(DIFF_COLS, "matrix_base_name")) { | ||
if (col %in% c(DIFF_COLS, "originating_question")) { | ||
paste(prefix, col, sep = "_") | ||
} else { | ||
col | ||
|
@@ -411,6 +407,16 @@ get_old_version <- function(new_version, compare_map) { | |
ifelse(new_version %in% compare_map, compare_map[compare_map == new_version] %>% names(), NA_character_) | ||
} | ||
|
||
collapse_subq_elements <- function(variable_name, matrix_field, base_name) { | ||
subq_codes <- str_replace(variable_name, paste0(base_name, "_"), "") %>% | ||
strsplit("_") %>% | ||
# Get the first underscore-delimited chunk. Handles the C10 case, where | ||
# matrix subqs are called C10_<code>_1. | ||
purrr::map(~ .x[1]) | ||
matrix_field <- as.list(matrix_field) | ||
names(matrix_field) <- subq_codes | ||
toJSON(matrix_field, auto_unbox = TRUE) | ||
} | ||
|
||
args <- commandArgs(TRUE) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.