Skip to content

update conversion tools and their tests #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 74 additions & 45 deletions reproschema/redcap2reproschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,43 @@


def clean_header(header):
return {k.lstrip("\ufeff"): v for k, v in header.items()}
cleaned_header = {}
for k, v in header.items():
# Strip BOM, whitespace, and enclosing quotation marks if present
cleaned_key = k.lstrip("\ufeff").strip().strip('"')
cleaned_header[cleaned_key] = v
return cleaned_header


def normalize_condition(condition_str):
# Regular expressions for various pattern replacements
re_parentheses = re.compile(r"\(([0-9]*)\)")
re_non_gt_lt_equal = re.compile(r"([^>|<])=")
re_brackets = re.compile(r"\[([^\]]*)\]")
re_extra_spaces = re.compile(r"\s+")
re_double_quotes = re.compile(r'"')
re_or = re.compile(r"\bor\b") # Match 'or' as whole word

# Apply regex replacements
condition_str = re_parentheses.sub(r"___\1", condition_str)
condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str)
condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ")
condition_str = re_brackets.sub(r" \1 ", condition_str)
return condition_str

# Replace 'or' with '||', ensuring not to replace '||'
condition_str = re_or.sub("||", condition_str)

# Replace 'and' with '&&'
condition_str = condition_str.replace(" and ", " && ")

# Trim extra spaces and replace double quotes with single quotes
condition_str = re_extra_spaces.sub(
" ", condition_str
).strip() # Reduce multiple spaces to a single space
condition_str = re_double_quotes.sub(
"'", condition_str
) # Replace double quotes with single quotes

return condition_str.strip()


def process_visibility(data):
Expand All @@ -42,7 +66,11 @@ def process_visibility(data):

def parse_field_type_and_value(field, input_type_map):
field_type = field.get("Field Type", "")
input_type = input_type_map.get(field_type, field_type)
# Check if field_type is 'yesno' and directly assign 'radio' as the input type
if field_type == "yesno":
input_type = "radio" # Directly set to 'radio' for 'yesno' fields
else:
input_type = input_type_map.get(field_type, field_type) # Original logic

# Initialize the default value type as string
value_type = "xsd:string"
Expand All @@ -55,7 +83,8 @@ def parse_field_type_and_value(field, input_type_map):
"time_": "xsd:time",
"email": "xsd:string",
"phone": "xsd:string",
} # todo: input_type="signature"
# No change needed here for 'yesno', as it's handled above
}

# Get the validation type from the field, if available
validation_type = field.get(
Expand Down Expand Up @@ -91,10 +120,11 @@ def process_choices(field_type, choices_str):
except ValueError:
value = parts[0]

choice_obj = {"name": parts[1], "value": value}
if len(parts) == 3:
# Handle image url
choice_obj["schema:image"] = f"{parts[2]}.png"
choice_obj = {"name": " ".join(parts[1:]), "value": value}
# remove image for now
# if len(parts) == 3:
# # Handle image url
# choice_obj["image"] = f"{parts[2]}.png"
choices.append(choice_obj)
return choices

Expand Down Expand Up @@ -156,7 +186,7 @@ def process_row(

rowData = {
"@context": schema_context_url,
"@type": "reproschema:Field",
"@type": "reproschema:Item",
"@id": item_id,
"prefLabel": item_id,
"description": f"{item_id} of {form_name}",
Expand All @@ -179,10 +209,7 @@ def process_row(
}

for key, value in field.items():
if (
schema_map.get(key) in ["question", "schema:description", "preamble"]
and value
):
if schema_map.get(key) in ["question", "description", "preamble"] and value:
rowData.update({schema_map[key]: parse_html(value)})

elif schema_map.get(key) == "allow" and value:
Expand Down Expand Up @@ -214,21 +241,15 @@ def process_row(
}
)

elif schema_map.get(key) == "visibility" and value:
condition = normalize_condition(value)
rowData.setdefault("visibility", []).append(
{"variableName": field["Variable / Field Name"], "isVis": condition}
)

elif key == "Identifier?" and value:
identifier_val = value.lower() == "y"
rowData.update(
{
schema_map[key]: [
{"legalStandard": "unknown", "isIdentifier": identifier_val}
]
}
)
# elif key == "Identifier?" and value:
# identifier_val = value.lower() == "y"
# rowData.update(
# {
# schema_map[key]: [
# {"legalStandard": "unknown", "isIdentifier": identifier_val}
# ]
# }
# )

elif key in additional_notes_list and value:
notes_obj = {"source": "redcap", "column": key, "value": value}
Expand All @@ -240,6 +261,7 @@ def process_row(
def create_form_schema(
abs_folder_path,
schema_context_url,
redcap_version,
form_name,
activity_display_name,
activity_description,
Expand All @@ -259,16 +281,17 @@ def create_form_schema(
"prefLabel": activity_display_name,
"description": activity_description,
"schemaVersion": "1.0.0-rc4",
"version": "0.0.1",
"version": redcap_version,
"ui": {
"order": unique_order,
"addProperties": bl_list,
"shuffle": False,
},
}

if matrix_list:
json_ld["matrixInfo"] = matrix_list
# remove matrixInfo to pass validataion
# if matrix_list:
# json_ld["matrixInfo"] = matrix_list
if scores_list:
json_ld["scoringLogic"] = scores_list

Expand Down Expand Up @@ -296,6 +319,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order):
def create_protocol_schema(
abs_folder_path,
schema_context_url,
redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
Expand All @@ -307,31 +331,33 @@ def create_protocol_schema(
"@context": schema_context_url,
"@type": "reproschema:Protocol",
"@id": f"{protocol_name}_schema",
"skos:prefLabel": protocol_display_name,
"skos:altLabel": f"{protocol_name}_schema",
"schema:description": protocol_description,
"schema:schemaVersion": "1.0.0-rc4",
"schema:version": "0.0.1",
"prefLabel": protocol_display_name,
"altLabel": f"{protocol_name}_schema",
"description": protocol_description,
"schemaVersion": "1.0.0-rc4",
"version": redcap_version,
"ui": {
"addProperties": [],
"order": protocol_order,
"order": [],
"shuffle": False,
},
}

# Populate addProperties list
for activity in protocol_order:
full_path = f"../activities/{activity}/{activity}_schema"
add_property = {
"isAbout": f"../activities/{activity}/{activity}_schema",
"isAbout": full_path,
"variableName": f"{activity}_schema",
# Assuming activity name as prefLabel, update as needed
"prefLabel": activity.replace("_", " ").title(),
"isVis": protocol_visibility_obj.get(
activity, True
), # Default to True if not specified
}
protocol_schema["ui"]["addProperties"].append(add_property)

# Add visibility if needed
if protocol_visibility_obj:
protocol_schema["ui"]["visibility"] = protocol_visibility_obj
# Add the full path to the order list
protocol_schema["ui"]["order"].append(full_path)

protocol_dir = f"{abs_folder_path}/{protocol_name}"
schema_file = f"{protocol_name}_schema"
Expand Down Expand Up @@ -420,6 +446,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
protocol_name = protocol.get("protocol_name")
protocol_display_name = protocol.get("protocol_display_name")
protocol_description = protocol.get("protocol_description")
redcap_version = protocol.get("redcap_version")

if not protocol_name:
raise ValueError("Protocol name not specified in the YAML file.")
Expand All @@ -434,7 +461,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
abs_folder_path = os.path.abspath(protocol_name)

if schema_context_url is None:
schema_context_url = "https://github.com/raw/ReproNim/reproschema/1.0.0-rc4/contexts/generic"
schema_context_url = "https://github.com/raw/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new"

# Initialize variables
schema_map = {
Expand All @@ -451,7 +478,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
"Choices, Calculations, OR Slider Labels": "choices", # column F
"Branching Logic (Show field only if...)": "visibility", # column L
"Custom Alignment": "customAlignment", # column N
"Identifier?": "identifiable", # column K
# "Identifier?": "identifiable", # column K
"multipleChoice": "multipleChoice",
"responseType": "@type",
}
Expand Down Expand Up @@ -515,6 +542,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_form_schema(
abs_folder_path,
schema_context_url,
redcap_version,
form_name,
activity_display_name,
activity_description,
Expand All @@ -530,6 +558,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_protocol_schema(
abs_folder_path,
schema_context_url,
redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
Expand Down
11 changes: 9 additions & 2 deletions reproschema/reproschema2redcap.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def get_csv_data(dir_path):
if protocol_dir.is_dir():
# Check for a _schema file in each directory
schema_file = next(protocol_dir.glob("*_schema"), None)
print(f"Found schema file: {schema_file}")
if schema_file:
# Process the found _schema file
parsed_protocol_json = read_json_file(schema_file)
Expand All @@ -152,8 +153,14 @@ def get_csv_data(dir_path):
normalized_relative_path = Path(
relative_activity_path.lstrip("../")
)
activity_path = dir_path / normalized_relative_path
print(f"Processing activity {activity_path}")

activity_path = (
dir_path
/ "activities"
/ normalized_relative_path
/ (normalized_relative_path.name + "_schema")
)

parsed_activity_json = read_json_file(activity_path)

if parsed_activity_json:
Expand Down
39 changes: 28 additions & 11 deletions reproschema/tests/test_redcap2reproschema.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import shutil
import pytest
import yaml
from click.testing import CliRunner
from ..cli import main # Import the Click group
from ..cli import main

# Assuming your test files are located in a 'tests' directory
CSV_FILE_NAME = "redcap_dict.csv"
YAML_FILE_NAME = "redcap2rs.yaml"
CSV_TEST_FILE = os.path.join(
Expand All @@ -15,17 +15,34 @@
)


def test_redcap2reproschema_success():
def test_redcap2reproschema(tmpdir):
runner = CliRunner()

with runner.isolated_filesystem():
# Copy the test files to the isolated filesystem
shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME)
shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME)
temp_csv_file = tmpdir.join(CSV_FILE_NAME)
temp_yaml_file = tmpdir.join(YAML_FILE_NAME)

shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string
shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string

# Change the current working directory to tmpdir
with tmpdir.as_cwd():
# Read YAML to find the expected output directory name
with open(str(temp_yaml_file), "r") as file: # Convert to string
protocol = yaml.safe_load(file)
protocol_name = protocol.get("protocol_name", "").replace(" ", "_")

# Run the command within the isolated filesystem
result = runner.invoke(
main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME]
main,
[
"redcap2reproschema",
str(temp_csv_file),
str(temp_yaml_file),
], # Convert to string
)
print(result.output)
assert result.exit_code == 0

assert (
result.exit_code == 0
), f"The command failed to execute successfully: {result.output}"
assert os.path.isdir(
protocol_name
), f"Expected output directory '{protocol_name}' does not exist"
9 changes: 2 additions & 7 deletions reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol"
# This name will be displayed in the application.
protocol_display_name: "redcap protocols"

# GitHub Repository Information:
# Create a GitHub repository named 'reproschema' to store your reproschema protocols.
# Replace 'your_github_username' with your actual GitHub username.
user_name: "yibeichan"
repo_name: "redcap2reproschema" # Recommended name; can be different if preferred.
repo_url: "https://github.com/{{user_name}}/{{repo_name}}"

# Protocol Description:
# Provide a brief description of your protocol.
protocol_description: "testing" # Example: "This protocol is for ..."

redcap_version: "3.0.0"
Loading