From 1d1b1e3a87669a7106f2acec3eaef667ab9c4b6f Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 1 Aug 2025 15:53:10 -0400 Subject: [PATCH 01/10] Drop ga4gh pinned version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2148ddafd..96f7d8662 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ annoy -ga4gh.vrs[extras]==0.8.4 +ga4gh.vrs[extras] hail hdbscan ipywidgets From c5c0653741182e0fc186d5c543e1eb0122b2bc09 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Mon, 4 Aug 2025 10:33:38 -0400 Subject: [PATCH 02/10] Add VRS tests --- gnomad/utils/annotations.py | 17 ++- requirements.txt | 2 +- tests/utils/test_annotations.py | 216 ++++++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+), 4 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index 1971d8f8d..c0559e303 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -2672,9 +2672,20 @@ def add_gks_vrs( "state": {"type": "LiteralSequenceExpression", "sequence": vrs_state_sequence}, } - location_id = ga4gh_core._internal.identifiers.ga4gh_identify( - ga4gh_vrs.models.SequenceLocation(**vrs_dict_out["location"]) - ) + # Check VRS version and use appropriate API + if hasattr(ga4gh_core, "ga4gh_identify"): + # VRS 2.0.0+ API + seq_loc = ga4gh_vrs.models.SequenceLocation( + sequenceReference=vrs_chrom_id, + start=vrs_start_value, + end=vrs_end_value, + ) + location_id = ga4gh_core.ga4gh_identify(seq_loc) + else: + # VRS 0.8.4 API + location_id = ga4gh_core._internal.identifiers.ga4gh_identify( + ga4gh_vrs.models.SequenceLocation(**vrs_dict_out["location"]) + ) vrs_dict_out["location"]["_id"] = location_id diff --git a/requirements.txt b/requirements.txt index 96f7d8662..449d05a06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ annoy -ga4gh.vrs[extras] +ga4gh.vrs[extras]==2.0.0 hail hdbscan ipywidgets diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index a97234a92..5e99b1b02 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -6,6 +6,8 @@ import pytest from gnomad.utils.annotations import ( + add_gks_va, + add_gks_vrs, fill_missing_key_combinations, get_copy_state_by_sex, merge_array_expressions, @@ -1085,3 +1087,217 @@ def test_merge_histograms_sum_with_negatives_error(self, sample_ht): result_hist = merge_histograms([ht.hist1, ht.hist2], operation="sum") with pytest.raises(Exception): ht.select(result_hist=result_hist).collect() + + +class TestVRSFunctions: + """Test the VRS-related functions.""" + + def test_add_gks_vrs_import(self): + """Test that the VRS functions can be imported successfully.""" + from gnomad.utils.annotations import add_gks_va, add_gks_vrs + + assert callable(add_gks_vrs) + assert callable(add_gks_va) + + def test_vrs_chromosome_ids_available(self): + """Test that VRS chromosome IDs are available.""" + from gnomad.utils.annotations import VRS_CHROM_IDS + + # Check that VRS_CHROM_IDS contains expected builds + assert "GRCh38" in VRS_CHROM_IDS + assert "GRCh37" in VRS_CHROM_IDS + + # Check that chromosome mappings exist + grch38_chroms = VRS_CHROM_IDS["GRCh38"] + grch37_chroms = VRS_CHROM_IDS["GRCh37"] + + # Check some common chromosomes + assert "chr1" in grch38_chroms + assert "chrX" in grch38_chroms + assert "1" in grch37_chroms + assert "X" in grch37_chroms + + # Check that IDs are in the expected format + assert grch38_chroms["chr1"].startswith("ga4gh:SQ.") + assert grch37_chroms["1"].startswith("ga4gh:SQ.") + + def test_vrs_functions_signatures(self): + """Test that VRS functions have the expected signatures.""" + import inspect + + from gnomad.utils.annotations import add_gks_va, add_gks_vrs + + # Check add_gks_vrs signature + sig = inspect.signature(add_gks_vrs) + assert len(sig.parameters) == 2 + assert "input_locus" in sig.parameters + assert "input_vrs" in sig.parameters + + # Check add_gks_va signature + sig = inspect.signature(add_gks_va) + assert len(sig.parameters) == 7 # All parameters including defaults + assert "input_struct" in sig.parameters + assert "label_name" in sig.parameters + assert "label_version" in sig.parameters + assert "gen_anc_groups" in sig.parameters + assert "gen_anc_groups_dict" in sig.parameters + assert "by_sex" in sig.parameters + assert "freq_index_dict" in sig.parameters + + def test_vrs_version_compatibility(self): + """Test that VRS version compatibility check works.""" + import ga4gh.core as ga4gh_core + + # Test that the compatibility check works + has_ga4gh_identify = hasattr(ga4gh_core, "ga4gh_identify") + assert isinstance(has_ga4gh_identify, bool) + + # The function should work regardless of the VRS version + from gnomad.utils.annotations import add_gks_vrs + + assert callable(add_gks_vrs) + + def test_vrs_chromosome_id_validation(self): + """Test that VRS chromosome IDs are valid GA4GH identifiers.""" + import re + + from gnomad.utils.annotations import VRS_CHROM_IDS + + # GA4GH identifier pattern: ga4gh:SQ. + ga4gh_pattern = re.compile(r"^ga4gh:SQ\.[A-Za-z0-9_-]+$") + + for build, chrom_dict in VRS_CHROM_IDS.items(): + for chrom, identifier in chrom_dict.items(): + assert ga4gh_pattern.match( + identifier + ), f"Invalid GA4GH identifier: {identifier}" + + def test_vrs_chromosome_coverage(self): + """Test that all expected chromosomes are covered.""" + from gnomad.utils.annotations import VRS_CHROM_IDS + + # Check GRCh38 coverage + grch38_chroms = VRS_CHROM_IDS["GRCh38"] + expected_grch38 = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"] + + for chrom in expected_grch38: + assert chrom in grch38_chroms, f"Missing chromosome {chrom} in GRCh38" + + # Check GRCh37 coverage + grch37_chroms = VRS_CHROM_IDS["GRCh37"] + expected_grch37 = [str(i) for i in range(1, 23)] + ["X", "Y"] + + for chrom in expected_grch37: + assert chrom in grch37_chroms, f"Missing chromosome {chrom} in GRCh37" + + def test_vrs_function_docstrings(self): + """Test that VRS functions have proper docstrings.""" + from gnomad.utils.annotations import add_gks_va, add_gks_vrs + + # Check add_gks_vrs docstring + assert add_gks_vrs.__doc__ is not None + assert "VRS" in add_gks_vrs.__doc__ + assert "GA4GH" in add_gks_vrs.__doc__ + + # Check add_gks_va docstring + assert add_gks_va.__doc__ is not None + assert "GKS VA" in add_gks_va.__doc__ + assert "frequency" in add_gks_va.__doc__ + + def test_vrs_function_return_types(self): + """Test that VRS functions return the expected types.""" + import inspect + + from gnomad.utils.annotations import add_gks_va, add_gks_vrs + + # Check return type annotations + sig = inspect.signature(add_gks_vrs) + assert sig.return_annotation == dict + + sig = inspect.signature(add_gks_va) + assert sig.return_annotation == dict + + def test_vrs_imports_work(self): + """Test that VRS-related imports work without errors.""" + try: + import ga4gh.core as ga4gh_core + import ga4gh.vrs as ga4gh_vrs + + assert True # If we get here, imports worked + except ImportError as e: + pytest.fail(f"VRS imports failed: {e}") + + def test_vrs_identifier_generation(self): + """Test that GA4GH identifiers are generated correctly.""" + import ga4gh.core as ga4gh_core + import ga4gh.vrs as ga4gh_vrs + + # Test that we can import and access VRS modules + assert hasattr(ga4gh_core, "__version__") or hasattr(ga4gh_core, "_internal") + assert hasattr(ga4gh_vrs, "models") + assert hasattr(ga4gh_vrs.models, "SequenceLocation") + + # Test that identifier generation function exists (either version) + has_new_api = hasattr(ga4gh_core, "ga4gh_identify") + has_old_api = hasattr(ga4gh_core, "_internal") and hasattr( + ga4gh_core._internal, "identifiers" + ) + + assert has_new_api or has_old_api, "No GA4GH identifier function found" + + # Test that we can create a basic VRS object (without complex validation) + try: + # Try the old API format which should work with VRS 0.8.4 + location_dict = { + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.test", + "interval": { + "start": {"type": "Number", "value": 1}, + "end": {"type": "Number", "value": 2}, + "type": "SequenceInterval", + }, + } + seq_loc = ga4gh_vrs.models.SequenceLocation(**location_dict) + assert seq_loc is not None + except Exception: + # If that fails, just verify the modules are accessible + assert "ga4gh" in str(type(ga4gh_core)) + assert "ga4gh" in str(type(ga4gh_vrs)) + + def test_vrs_error_handling(self): + """Test that VRS functions handle errors gracefully.""" + from gnomad.utils.annotations import add_gks_vrs + + # Test with invalid locus (should raise appropriate error) + with pytest.raises((AttributeError, TypeError)): + # This should fail because we're not providing proper Hail locus objects + add_gks_vrs("invalid_locus", "invalid_vrs") + + def test_vrs_function_consistency(self): + """Test that VRS functions are consistent across calls.""" + from gnomad.utils.annotations import add_gks_va, add_gks_vrs + + # Test that functions are callable and don't change between calls + assert callable(add_gks_vrs) + assert callable(add_gks_va) + + # Test that function objects are stable + func1 = add_gks_vrs + func2 = add_gks_vrs + assert func1 is func2 + + def test_vrs_parameter_defaults(self): + """Test that VRS function parameters have expected defaults.""" + import inspect + + from gnomad.utils.annotations import add_gks_va + + sig = inspect.signature(add_gks_va) + + # Check default values + assert sig.parameters["label_name"].default == "gnomAD" + assert sig.parameters["label_version"].default == "3.1.2" + assert sig.parameters["gen_anc_groups"].default is None + assert sig.parameters["gen_anc_groups_dict"].default is None + assert sig.parameters["by_sex"].default is False + assert sig.parameters["freq_index_dict"].default is None From a5218ca8068d619847c39f6e008f728e11156a40 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 5 Aug 2025 21:00:48 -0400 Subject: [PATCH 03/10] Reduce redundant and excessive tests for vrs --- gnomad/utils/annotations.py | 27 ++++- requirements.txt | 2 +- tests/utils/test_annotations.py | 195 ++++++-------------------------- 3 files changed, 59 insertions(+), 165 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index c0559e303..ccf763a4d 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -2675,12 +2675,27 @@ def add_gks_vrs( # Check VRS version and use appropriate API if hasattr(ga4gh_core, "ga4gh_identify"): # VRS 2.0.0+ API - seq_loc = ga4gh_vrs.models.SequenceLocation( - sequenceReference=vrs_chrom_id, - start=vrs_start_value, - end=vrs_end_value, - ) - location_id = ga4gh_core.ga4gh_identify(seq_loc) + try: + # Extract the SQ. part from the full ga4gh:SQ. identifier + refget_accession = ( + vrs_chrom_id.split("ga4gh:")[1] + if "ga4gh:" in vrs_chrom_id + else vrs_chrom_id + ) + seq_ref = ga4gh_vrs.models.SequenceReference( + refgetAccession=refget_accession + ) + seq_loc = ga4gh_vrs.models.SequenceLocation( + sequenceReference=seq_ref, + start=vrs_start_value, + end=vrs_end_value, + ) + location_id = ga4gh_core.ga4gh_identify(seq_loc) + except AttributeError: + # Fallback to VRS 0.8.4 API if SequenceReference doesn't exist + location_id = ga4gh_core._internal.identifiers.ga4gh_identify( + ga4gh_vrs.models.SequenceLocation(**vrs_dict_out["location"]) + ) else: # VRS 0.8.4 API location_id = ga4gh_core._internal.identifiers.ga4gh_identify( diff --git a/requirements.txt b/requirements.txt index 449d05a06..3915ad76f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ annoy -ga4gh.vrs[extras]==2.0.0 +ga4gh.vrs[extras]==2.0.1 hail hdbscan ipywidgets diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 5e99b1b02..176fd9e1f 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -1099,124 +1099,6 @@ def test_add_gks_vrs_import(self): assert callable(add_gks_vrs) assert callable(add_gks_va) - def test_vrs_chromosome_ids_available(self): - """Test that VRS chromosome IDs are available.""" - from gnomad.utils.annotations import VRS_CHROM_IDS - - # Check that VRS_CHROM_IDS contains expected builds - assert "GRCh38" in VRS_CHROM_IDS - assert "GRCh37" in VRS_CHROM_IDS - - # Check that chromosome mappings exist - grch38_chroms = VRS_CHROM_IDS["GRCh38"] - grch37_chroms = VRS_CHROM_IDS["GRCh37"] - - # Check some common chromosomes - assert "chr1" in grch38_chroms - assert "chrX" in grch38_chroms - assert "1" in grch37_chroms - assert "X" in grch37_chroms - - # Check that IDs are in the expected format - assert grch38_chroms["chr1"].startswith("ga4gh:SQ.") - assert grch37_chroms["1"].startswith("ga4gh:SQ.") - - def test_vrs_functions_signatures(self): - """Test that VRS functions have the expected signatures.""" - import inspect - - from gnomad.utils.annotations import add_gks_va, add_gks_vrs - - # Check add_gks_vrs signature - sig = inspect.signature(add_gks_vrs) - assert len(sig.parameters) == 2 - assert "input_locus" in sig.parameters - assert "input_vrs" in sig.parameters - - # Check add_gks_va signature - sig = inspect.signature(add_gks_va) - assert len(sig.parameters) == 7 # All parameters including defaults - assert "input_struct" in sig.parameters - assert "label_name" in sig.parameters - assert "label_version" in sig.parameters - assert "gen_anc_groups" in sig.parameters - assert "gen_anc_groups_dict" in sig.parameters - assert "by_sex" in sig.parameters - assert "freq_index_dict" in sig.parameters - - def test_vrs_version_compatibility(self): - """Test that VRS version compatibility check works.""" - import ga4gh.core as ga4gh_core - - # Test that the compatibility check works - has_ga4gh_identify = hasattr(ga4gh_core, "ga4gh_identify") - assert isinstance(has_ga4gh_identify, bool) - - # The function should work regardless of the VRS version - from gnomad.utils.annotations import add_gks_vrs - - assert callable(add_gks_vrs) - - def test_vrs_chromosome_id_validation(self): - """Test that VRS chromosome IDs are valid GA4GH identifiers.""" - import re - - from gnomad.utils.annotations import VRS_CHROM_IDS - - # GA4GH identifier pattern: ga4gh:SQ. - ga4gh_pattern = re.compile(r"^ga4gh:SQ\.[A-Za-z0-9_-]+$") - - for build, chrom_dict in VRS_CHROM_IDS.items(): - for chrom, identifier in chrom_dict.items(): - assert ga4gh_pattern.match( - identifier - ), f"Invalid GA4GH identifier: {identifier}" - - def test_vrs_chromosome_coverage(self): - """Test that all expected chromosomes are covered.""" - from gnomad.utils.annotations import VRS_CHROM_IDS - - # Check GRCh38 coverage - grch38_chroms = VRS_CHROM_IDS["GRCh38"] - expected_grch38 = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"] - - for chrom in expected_grch38: - assert chrom in grch38_chroms, f"Missing chromosome {chrom} in GRCh38" - - # Check GRCh37 coverage - grch37_chroms = VRS_CHROM_IDS["GRCh37"] - expected_grch37 = [str(i) for i in range(1, 23)] + ["X", "Y"] - - for chrom in expected_grch37: - assert chrom in grch37_chroms, f"Missing chromosome {chrom} in GRCh37" - - def test_vrs_function_docstrings(self): - """Test that VRS functions have proper docstrings.""" - from gnomad.utils.annotations import add_gks_va, add_gks_vrs - - # Check add_gks_vrs docstring - assert add_gks_vrs.__doc__ is not None - assert "VRS" in add_gks_vrs.__doc__ - assert "GA4GH" in add_gks_vrs.__doc__ - - # Check add_gks_va docstring - assert add_gks_va.__doc__ is not None - assert "GKS VA" in add_gks_va.__doc__ - assert "frequency" in add_gks_va.__doc__ - - def test_vrs_function_return_types(self): - """Test that VRS functions return the expected types.""" - import inspect - - from gnomad.utils.annotations import add_gks_va, add_gks_vrs - - # Check return type annotations - sig = inspect.signature(add_gks_vrs) - assert sig.return_annotation == dict - - sig = inspect.signature(add_gks_va) - assert sig.return_annotation == dict - def test_vrs_imports_work(self): """Test that VRS-related imports work without errors.""" try: @@ -1247,22 +1129,35 @@ def test_vrs_identifier_generation(self): # Test that we can create a basic VRS object (without complex validation) try: - # Try the old API format which should work with VRS 0.8.4 - location_dict = { - "type": "SequenceLocation", - "sequence_id": "ga4gh:SQ.test", - "interval": { - "start": {"type": "Number", "value": 1}, - "end": {"type": "Number", "value": 2}, - "type": "SequenceInterval", - }, - } - seq_loc = ga4gh_vrs.models.SequenceLocation(**location_dict) + # Try the VRS 2.0.1 API format + seq_loc = ga4gh_vrs.models.SequenceLocation( + sequenceReference="ga4gh:SQ.test", + start=1, + end=2, + ) assert seq_loc is not None - except Exception: - # If that fails, just verify the modules are accessible - assert "ga4gh" in str(type(ga4gh_core)) - assert "ga4gh" in str(type(ga4gh_vrs)) + print("VRS 2.0.1 API test successful") + except Exception as e: + print(f"VRS 2.0.1 API test failed: {e}") + try: + # Try the old API format which should work with VRS 0.8.4 + location_dict = { + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.test", + "interval": { + "start": {"type": "Number", "value": 1}, + "end": {"type": "Number", "value": 2}, + "type": "SequenceInterval", + }, + } + seq_loc = ga4gh_vrs.models.SequenceLocation(**location_dict) + assert seq_loc is not None + print("VRS 0.8.4 API test successful") + except Exception as e2: + print(f"VRS 0.8.4 API test failed: {e2}") + # If both fail, just verify the modules are accessible + assert "ga4gh" in str(type(ga4gh_core)) + assert "ga4gh" in str(type(ga4gh_vrs)) def test_vrs_error_handling(self): """Test that VRS functions handle errors gracefully.""" @@ -1273,31 +1168,15 @@ def test_vrs_error_handling(self): # This should fail because we're not providing proper Hail locus objects add_gks_vrs("invalid_locus", "invalid_vrs") - def test_vrs_function_consistency(self): - """Test that VRS functions are consistent across calls.""" - from gnomad.utils.annotations import add_gks_va, add_gks_vrs - - # Test that functions are callable and don't change between calls - assert callable(add_gks_vrs) - assert callable(add_gks_va) - - # Test that function objects are stable - func1 = add_gks_vrs - func2 = add_gks_vrs - assert func1 is func2 - - def test_vrs_parameter_defaults(self): - """Test that VRS function parameters have expected defaults.""" - import inspect + def test_vrs_version_compatibility(self): + """Test that VRS version compatibility check works.""" + import ga4gh.core as ga4gh_core - from gnomad.utils.annotations import add_gks_va + # Test that the compatibility check works + has_ga4gh_identify = hasattr(ga4gh_core, "ga4gh_identify") + assert isinstance(has_ga4gh_identify, bool) - sig = inspect.signature(add_gks_va) + # The function should work regardless of the VRS version + from gnomad.utils.annotations import add_gks_vrs - # Check default values - assert sig.parameters["label_name"].default == "gnomAD" - assert sig.parameters["label_version"].default == "3.1.2" - assert sig.parameters["gen_anc_groups"].default is None - assert sig.parameters["gen_anc_groups_dict"].default is None - assert sig.parameters["by_sex"].default is False - assert sig.parameters["freq_index_dict"].default is None + assert callable(add_gks_vrs) From e1c5dee51ef5b654cbfd2e1c9cdd47bf73a6c726 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 22 Aug 2025 13:35:44 -0400 Subject: [PATCH 04/10] Drop 0.8.4 support since we can only pin a single version, add more tests --- gnomad/utils/annotations.py | 41 +++------- tests/utils/test_annotations.py | 134 +++++++++++++++++++++----------- 2 files changed, 102 insertions(+), 73 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index ccf763a4d..60ae62264 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -2672,35 +2672,18 @@ def add_gks_vrs( "state": {"type": "LiteralSequenceExpression", "sequence": vrs_state_sequence}, } - # Check VRS version and use appropriate API - if hasattr(ga4gh_core, "ga4gh_identify"): - # VRS 2.0.0+ API - try: - # Extract the SQ. part from the full ga4gh:SQ. identifier - refget_accession = ( - vrs_chrom_id.split("ga4gh:")[1] - if "ga4gh:" in vrs_chrom_id - else vrs_chrom_id - ) - seq_ref = ga4gh_vrs.models.SequenceReference( - refgetAccession=refget_accession - ) - seq_loc = ga4gh_vrs.models.SequenceLocation( - sequenceReference=seq_ref, - start=vrs_start_value, - end=vrs_end_value, - ) - location_id = ga4gh_core.ga4gh_identify(seq_loc) - except AttributeError: - # Fallback to VRS 0.8.4 API if SequenceReference doesn't exist - location_id = ga4gh_core._internal.identifiers.ga4gh_identify( - ga4gh_vrs.models.SequenceLocation(**vrs_dict_out["location"]) - ) - else: - # VRS 0.8.4 API - location_id = ga4gh_core._internal.identifiers.ga4gh_identify( - ga4gh_vrs.models.SequenceLocation(**vrs_dict_out["location"]) - ) + # Use VRS 2.0.1+ API + # Extract the SQ. part from the full ga4gh:SQ. identifier + refget_accession = ( + vrs_chrom_id.split("ga4gh:")[1] if "ga4gh:" in vrs_chrom_id else vrs_chrom_id + ) + seq_ref = ga4gh_vrs.models.SequenceReference(refgetAccession=refget_accession) + seq_loc = ga4gh_vrs.models.SequenceLocation( + sequenceReference=seq_ref, + start=vrs_start_value, + end=vrs_end_value, + ) + location_id = ga4gh_core.ga4gh_identify(seq_loc) vrs_dict_out["location"]["_id"] = location_id diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 176fd9e1f..9bf511617 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -1115,49 +1115,22 @@ def test_vrs_identifier_generation(self): import ga4gh.vrs as ga4gh_vrs # Test that we can import and access VRS modules - assert hasattr(ga4gh_core, "__version__") or hasattr(ga4gh_core, "_internal") + assert hasattr(ga4gh_core, "__version__") assert hasattr(ga4gh_vrs, "models") assert hasattr(ga4gh_vrs.models, "SequenceLocation") - # Test that identifier generation function exists (either version) - has_new_api = hasattr(ga4gh_core, "ga4gh_identify") - has_old_api = hasattr(ga4gh_core, "_internal") and hasattr( - ga4gh_core._internal, "identifiers" - ) - - assert has_new_api or has_old_api, "No GA4GH identifier function found" + # Test that VRS 2.0.1+ API is available + assert hasattr( + ga4gh_core, "ga4gh_identify" + ), "VRS 2.0.1+ ga4gh_identify function not found" - # Test that we can create a basic VRS object (without complex validation) - try: - # Try the VRS 2.0.1 API format - seq_loc = ga4gh_vrs.models.SequenceLocation( - sequenceReference="ga4gh:SQ.test", - start=1, - end=2, - ) - assert seq_loc is not None - print("VRS 2.0.1 API test successful") - except Exception as e: - print(f"VRS 2.0.1 API test failed: {e}") - try: - # Try the old API format which should work with VRS 0.8.4 - location_dict = { - "type": "SequenceLocation", - "sequence_id": "ga4gh:SQ.test", - "interval": { - "start": {"type": "Number", "value": 1}, - "end": {"type": "Number", "value": 2}, - "type": "SequenceInterval", - }, - } - seq_loc = ga4gh_vrs.models.SequenceLocation(**location_dict) - assert seq_loc is not None - print("VRS 0.8.4 API test successful") - except Exception as e2: - print(f"VRS 0.8.4 API test failed: {e2}") - # If both fail, just verify the modules are accessible - assert "ga4gh" in str(type(ga4gh_core)) - assert "ga4gh" in str(type(ga4gh_vrs)) + # Test that we can create a VRS object using the 2.0.1+ API + seq_loc = ga4gh_vrs.models.SequenceLocation( + sequenceReference="ga4gh:SQ.test", + start=1, + end=2, + ) + assert seq_loc is not None def test_vrs_error_handling(self): """Test that VRS functions handle errors gracefully.""" @@ -1168,15 +1141,88 @@ def test_vrs_error_handling(self): # This should fail because we're not providing proper Hail locus objects add_gks_vrs("invalid_locus", "invalid_vrs") + def test_add_gks_vrs_actual_api_call(self): + """Test that add_gks_vrs actually calls the VRS 2.0.1 API with real data.""" + import hail as hl + + from gnomad.utils.annotations import VRS_CHROM_IDS, add_gks_vrs + + # Create a real Hail locus and VRS struct that would come from actual data + locus = hl.locus("chr1", 100, reference_genome="GRCh38") + + # Create a VRS struct that mimics what would be in actual gnomAD data + vrs_struct = hl.struct( + VRS_Allele_IDs=["test_ref_id", "ga4gh:VA.test_var_id"], + VRS_Starts=[99, 99], # 0-based coordinates + VRS_Ends=[100, 100], + VRS_States=["A", "T"], # ref, alt + ) + + # Evaluate to get actual Python objects + locus_py = hl.eval(locus) + vrs_py = hl.eval(vrs_struct) + + # This should actually call ga4gh_core.ga4gh_identify() + result = add_gks_vrs(locus_py, vrs_py) + + # Verify the result has the expected VRS structure + assert isinstance(result, dict) + assert result["type"] == "Allele" + assert "location" in result + assert result["location"]["type"] == "SequenceLocation" + assert "_id" in result["location"] # This comes from ga4gh_identify() call + assert "state" in result + assert result["state"]["type"] == "LiteralSequenceExpression" + assert result["state"]["sequence"] == "T" + + # Verify the chromosome ID matches our VRS_CHROM_IDS mapping + expected_chr1_id = VRS_CHROM_IDS["GRCh38"]["chr1"] + assert result["location"]["sequence_id"] == expected_chr1_id + assert expected_chr1_id == "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO" + + # Verify the location ID was generated by the VRS API + location_id = result["location"]["_id"] + assert isinstance(location_id, str) + assert location_id.startswith("ga4gh:") # VRS identifiers start with ga4gh: + + def test_add_gks_vrs_grch37_chromosome_mapping(self): + """Test that VRS chromosome mapping works correctly for GRCh37.""" + import hail as hl + + from gnomad.utils.annotations import VRS_CHROM_IDS, add_gks_vrs + + # Test GRCh37 (note different chromosome naming: "1" vs "chr1") + locus_grch37 = hl.locus("1", 100, reference_genome="GRCh37") + vrs_struct = hl.struct( + VRS_Allele_IDs=["test_ref_id", "ga4gh:VA.test_var_id"], + VRS_Starts=[99, 99], + VRS_Ends=[100, 100], + VRS_States=["A", "T"], + ) + + locus_py = hl.eval(locus_grch37) + vrs_py = hl.eval(vrs_struct) + + result = add_gks_vrs(locus_py, vrs_py) + + # Verify GRCh37 chromosome 1 mapping + expected_chr1_grch37_id = VRS_CHROM_IDS["GRCh37"]["1"] + assert result["location"]["sequence_id"] == expected_chr1_grch37_id + assert expected_chr1_grch37_id == "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU" + + # Verify the API was called and generated a proper identifier + location_id = result["location"]["_id"] + assert isinstance(location_id, str) + assert location_id.startswith("ga4gh:") + def test_vrs_version_compatibility(self): - """Test that VRS version compatibility check works.""" + """Test that VRS 2.0.1+ is properly installed.""" import ga4gh.core as ga4gh_core - # Test that the compatibility check works - has_ga4gh_identify = hasattr(ga4gh_core, "ga4gh_identify") - assert isinstance(has_ga4gh_identify, bool) + # Test that VRS 2.0.1+ API is available + assert hasattr(ga4gh_core, "ga4gh_identify"), "VRS 2.0.1+ is required" - # The function should work regardless of the VRS version + # The function should work with VRS 2.0.1+ from gnomad.utils.annotations import add_gks_vrs assert callable(add_gks_vrs) From 685dc67d8cc7dd89485f2ddb83e461b77cbdb99d Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 22 Aug 2025 13:46:57 -0400 Subject: [PATCH 05/10] Drop ga4gh prefix from vrs_chrom_ids --- gnomad/utils/annotations.py | 103 +++++++++++++++----------------- tests/utils/test_annotations.py | 4 +- 2 files changed, 51 insertions(+), 56 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index 60ae62264..f4b89459e 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -36,56 +36,56 @@ VRS_CHROM_IDS = { "GRCh38": { - "chr1": "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", - "chr2": "ga4gh:SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", - "chr3": "ga4gh:SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", - "chr4": "ga4gh:SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc", - "chr5": "ga4gh:SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI", - "chr6": "ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV", - "chr7": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", - "chr8": "ga4gh:SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs", - "chr9": "ga4gh:SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI", - "chr10": "ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", - "chr11": "ga4gh:SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1", - "chr12": "ga4gh:SQ.6wlJpONE3oNb4D69ULmEXhqyDZ4vwNfl", - "chr13": "ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT", - "chr14": "ga4gh:SQ.eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm", - "chr15": "ga4gh:SQ.AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6", - "chr16": "ga4gh:SQ.yC_0RBj3fgBlvgyAuycbzdubtLxq-rE0", - "chr17": "ga4gh:SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", - "chr18": "ga4gh:SQ.vWwFhJ5lQDMhh-czg06YtlWqu0lvFAZV", - "chr19": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", - "chr20": "ga4gh:SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo", - "chr21": "ga4gh:SQ.5ZUqxCmDDgN4xTRbaSjN8LwgZironmB8", - "chr22": "ga4gh:SQ.7B7SHsmchAR0dFcDCuSFjJAo7tX87krQ", - "chrX": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", - "chrY": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "chr1": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", + "chr2": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + "chr3": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + "chr4": "SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc", + "chr5": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI", + "chr6": "SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV", + "chr7": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "chr8": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs", + "chr9": "SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI", + "chr10": "SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", + "chr11": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1", + "chr12": "SQ.6wlJpONE3oNb4D69ULmEXhqyDZ4vwNfl", + "chr13": "SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT", + "chr14": "SQ.eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm", + "chr15": "SQ.AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6", + "chr16": "SQ.yC_0RBj3fgBlvgyAuycbzdubtLxq-rE0", + "chr17": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", + "chr18": "SQ.vWwFhJ5lQDMhh-czg06YtlWqu0lvFAZV", + "chr19": "SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "chr20": "SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo", + "chr21": "SQ.5ZUqxCmDDgN4xTRbaSjN8LwgZironmB8", + "chr22": "SQ.7B7SHsmchAR0dFcDCuSFjJAo7tX87krQ", + "chrX": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "chrY": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", }, "GRCh37": { - "1": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", - "2": "ga4gh:SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO", - "3": "ga4gh:SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", - "4": "ga4gh:SQ.iy7Zfceb5_VGtTQzJ-v5JpPbpeifHD_V", - "5": "ga4gh:SQ.vbjOdMfHJvTjK_nqvFvpaSKhZillW0SX", - "6": "ga4gh:SQ.KqaUhJMW3CDjhoVtBetdEKT1n6hM-7Ek", - "7": "ga4gh:SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", - "8": "ga4gh:SQ.tTm7wmhz0G4lpt8wPspcNkAD_qiminj6", - "9": "ga4gh:SQ.HBckYGQ4wYG9APHLpjoQ9UUe9v7NxExt", - "10": "ga4gh:SQ.-BOZ8Esn8J88qDwNiSEwUr5425UXdiGX", - "11": "ga4gh:SQ.XXi2_O1ly-CCOi3HP5TypAw7LtC6niFG", - "12": "ga4gh:SQ.105bBysLoDFQHhajooTAUyUkNiZ8LJEH", - "13": "ga4gh:SQ.Ewb9qlgTqN6e_XQiRVYpoUfZJHXeiUfH", - "14": "ga4gh:SQ.5Ji6FGEKfejK1U6BMScqrdKJK8GqmIGf", - "15": "ga4gh:SQ.zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt", - "16": "ga4gh:SQ.W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb", - "17": "ga4gh:SQ.AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz", - "18": "ga4gh:SQ.BTj4BDaaHYoPhD3oY2GdwC_l0uqZ92UD", - "19": "ga4gh:SQ.ItRDD47aMoioDCNW_occY5fWKZBKlxCX", - "20": "ga4gh:SQ.iy_UbUrvECxFRX5LPTH_KPojdlT7BKsf", - "21": "ga4gh:SQ.LpTaNW-hwuY_yARP0rtarCnpCQLkgVCg", - "22": "ga4gh:SQ.XOgHwwR3Upfp5sZYk6ZKzvV25a4RBVu8", - "X": "ga4gh:SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", - "Y": "ga4gh:SQ.BT7QyW5iXaX_1PSX-msSGYsqRdMKqkj-", + "1": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + "2": "SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO", + "3": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + "4": "SQ.iy7Zfceb5_VGtTQzJ-v5JpPbpeifHD_V", + "5": "SQ.vbjOdMfHJvTjK_nqvFvpaSKhZillW0SX", + "6": "SQ.KqaUhJMW3CDjhoVtBetdEKT1n6hM-7Ek", + "7": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + "8": "SQ.tTm7wmhz0G4lpt8wPspcNkAD_qiminj6", + "9": "SQ.HBckYGQ4wYG9APHLpjoQ9UUe9v7NxExt", + "10": "SQ.-BOZ8Esn8J88qDwNiSEwUr5425UXdiGX", + "11": "SQ.XXi2_O1ly-CCOi3HP5TypAw7LtC6niFG", + "12": "SQ.105bBysLoDFQHhajooTAUyUkNiZ8LJEH", + "13": "SQ.Ewb9qlgTqN6e_XQiRVYpoUfZJHXeiUfH", + "14": "SQ.5Ji6FGEKfejK1U6BMScqrdKJK8GqmIGf", + "15": "SQ.zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt", + "16": "SQ.W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb", + "17": "SQ.AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz", + "18": "SQ.BTj4BDaaHYoPhD3oY2GdwC_l0uqZ92UD", + "19": "SQ.ItRDD47aMoioDCNW_occY5fWKZBKlxCX", + "20": "SQ.iy_UbUrvECxFRX5LPTH_KPojdlT7BKsf", + "21": "SQ.LpTaNW-hwuY_yARP0rtarCnpCQLkgVCg", + "22": "SQ.XOgHwwR3Upfp5sZYk6ZKzvV25a4RBVu8", + "X": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + "Y": "SQ.BT7QyW5iXaX_1PSX-msSGYsqRdMKqkj-", }, } @@ -2672,12 +2672,7 @@ def add_gks_vrs( "state": {"type": "LiteralSequenceExpression", "sequence": vrs_state_sequence}, } - # Use VRS 2.0.1+ API - # Extract the SQ. part from the full ga4gh:SQ. identifier - refget_accession = ( - vrs_chrom_id.split("ga4gh:")[1] if "ga4gh:" in vrs_chrom_id else vrs_chrom_id - ) - seq_ref = ga4gh_vrs.models.SequenceReference(refgetAccession=refget_accession) + seq_ref = ga4gh_vrs.models.SequenceReference(refgetAccession=vrs_chrom_id) seq_loc = ga4gh_vrs.models.SequenceLocation( sequenceReference=seq_ref, start=vrs_start_value, diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 9bf511617..b685ea02c 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -1178,7 +1178,7 @@ def test_add_gks_vrs_actual_api_call(self): # Verify the chromosome ID matches our VRS_CHROM_IDS mapping expected_chr1_id = VRS_CHROM_IDS["GRCh38"]["chr1"] assert result["location"]["sequence_id"] == expected_chr1_id - assert expected_chr1_id == "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO" + assert expected_chr1_id == "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO" # Verify the location ID was generated by the VRS API location_id = result["location"]["_id"] @@ -1208,7 +1208,7 @@ def test_add_gks_vrs_grch37_chromosome_mapping(self): # Verify GRCh37 chromosome 1 mapping expected_chr1_grch37_id = VRS_CHROM_IDS["GRCh37"]["1"] assert result["location"]["sequence_id"] == expected_chr1_grch37_id - assert expected_chr1_grch37_id == "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU" + assert expected_chr1_grch37_id == "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU" # Verify the API was called and generated a proper identifier location_id = result["location"]["_id"] From abdbf1eaa89a6ba0632ce01daf4e095b9617713e Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 22 Aug 2025 15:06:24 -0400 Subject: [PATCH 06/10] Update VRS test imports --- tests/utils/test_annotations.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index b685ea02c..e1973112c 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -6,6 +6,7 @@ import pytest from gnomad.utils.annotations import ( + VRS_CHROM_IDS, add_gks_va, add_gks_vrs, fill_missing_key_combinations, @@ -1094,16 +1095,14 @@ class TestVRSFunctions: def test_add_gks_vrs_import(self): """Test that the VRS functions can be imported successfully.""" - from gnomad.utils.annotations import add_gks_va, add_gks_vrs - assert callable(add_gks_vrs) assert callable(add_gks_va) def test_vrs_imports_work(self): """Test that VRS-related imports work without errors.""" try: - import ga4gh.core as ga4gh_core - import ga4gh.vrs as ga4gh_vrs + import ga4gh.core # noqa: F401 + import ga4gh.vrs # noqa: F401 assert True # If we get here, imports worked except ImportError as e: @@ -1134,8 +1133,6 @@ def test_vrs_identifier_generation(self): def test_vrs_error_handling(self): """Test that VRS functions handle errors gracefully.""" - from gnomad.utils.annotations import add_gks_vrs - # Test with invalid locus (should raise appropriate error) with pytest.raises((AttributeError, TypeError)): # This should fail because we're not providing proper Hail locus objects @@ -1143,10 +1140,6 @@ def test_vrs_error_handling(self): def test_add_gks_vrs_actual_api_call(self): """Test that add_gks_vrs actually calls the VRS 2.0.1 API with real data.""" - import hail as hl - - from gnomad.utils.annotations import VRS_CHROM_IDS, add_gks_vrs - # Create a real Hail locus and VRS struct that would come from actual data locus = hl.locus("chr1", 100, reference_genome="GRCh38") @@ -1187,10 +1180,6 @@ def test_add_gks_vrs_actual_api_call(self): def test_add_gks_vrs_grch37_chromosome_mapping(self): """Test that VRS chromosome mapping works correctly for GRCh37.""" - import hail as hl - - from gnomad.utils.annotations import VRS_CHROM_IDS, add_gks_vrs - # Test GRCh37 (note different chromosome naming: "1" vs "chr1") locus_grch37 = hl.locus("1", 100, reference_genome="GRCh37") vrs_struct = hl.struct( @@ -1223,6 +1212,4 @@ def test_vrs_version_compatibility(self): assert hasattr(ga4gh_core, "ga4gh_identify"), "VRS 2.0.1+ is required" # The function should work with VRS 2.0.1+ - from gnomad.utils.annotations import add_gks_vrs - assert callable(add_gks_vrs) From 80ca8fcdf4be4aaa89b0931448dcdc261587fc8d Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Fri, 22 Aug 2025 15:24:57 -0400 Subject: [PATCH 07/10] Drop overly simple import test --- tests/utils/test_annotations.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index e5d5889b0..7251c269b 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -1094,21 +1094,6 @@ def test_merge_histograms_sum_with_negatives_error(self, sample_ht): class TestVRSFunctions: """Test the VRS-related functions.""" - def test_add_gks_vrs_import(self): - """Test that the VRS functions can be imported successfully.""" - assert callable(add_gks_vrs) - assert callable(add_gks_va) - - def test_vrs_imports_work(self): - """Test that VRS-related imports work without errors.""" - try: - import ga4gh.core # noqa: F401 - import ga4gh.vrs # noqa: F401 - - assert True # If we get here, imports worked - except ImportError as e: - pytest.fail(f"VRS imports failed: {e}") - def test_vrs_identifier_generation(self): """Test that GA4GH identifiers are generated correctly.""" import ga4gh.core as ga4gh_core From c3f7aec0e435b69fa68ed18e769a83eeddb94b5b Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 16 Sep 2025 14:41:23 -0400 Subject: [PATCH 08/10] Remove redundant imports from gks tests --- gnomad/utils/annotations.py | 4 +--- tests/utils/test_annotations.py | 18 ++++++------------ 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index 30efe6801..c5c12cb91 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -2864,9 +2864,7 @@ def _create_group_dicts( ancillaryResults["grpMaxFAF95"] = { "frequency": input_struct.grpMaxFAF95.grpmax_gen_anc, "confidenceInterval": 0.95, - "groupId": ( - f"{gnomad_id}.{input_struct.grpMaxFAF95.grpmax_gen_anc.upper()}" - ), + "groupId": f"{gnomad_id}.{input_struct.grpMaxFAF95.grpmax_gen_anc.upper()}", } # Add joint group max FAF if it exists. diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 7251c269b..4fb2a8d96 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -1096,21 +1096,18 @@ class TestVRSFunctions: def test_vrs_identifier_generation(self): """Test that GA4GH identifiers are generated correctly.""" - import ga4gh.core as ga4gh_core - import ga4gh.vrs as ga4gh_vrs - # Test that we can import and access VRS modules - assert hasattr(ga4gh_core, "__version__") - assert hasattr(ga4gh_vrs, "models") - assert hasattr(ga4gh_vrs.models, "SequenceLocation") + assert hasattr(ga4gh.core, "__version__") + assert hasattr(ga4gh.vrs, "models") + assert hasattr(ga4gh.vrs.models, "SequenceLocation") # Test that VRS 2.0.1+ API is available assert hasattr( - ga4gh_core, "ga4gh_identify" + ga4gh.core, "ga4gh_identify" ), "VRS 2.0.1+ ga4gh_identify function not found" # Test that we can create a VRS object using the 2.0.1+ API - seq_loc = ga4gh_vrs.models.SequenceLocation( + seq_loc = ga4gh.vrs.models.SequenceLocation( sequenceReference="ga4gh:SQ.test", start=1, end=2, @@ -1141,7 +1138,6 @@ def test_add_gks_vrs_actual_api_call(self): locus_py = hl.eval(locus) vrs_py = hl.eval(vrs_struct) - # This should actually call ga4gh_core.ga4gh_identify() result = add_gks_vrs(locus_py, vrs_py) # Verify the result has the expected VRS structure @@ -1192,10 +1188,8 @@ def test_add_gks_vrs_grch37_chromosome_mapping(self): def test_vrs_version_compatibility(self): """Test that VRS 2.0.1+ is properly installed.""" - import ga4gh.core as ga4gh_core - # Test that VRS 2.0.1+ API is available - assert hasattr(ga4gh_core, "ga4gh_identify"), "VRS 2.0.1+ is required" + assert hasattr(ga4gh.core, "ga4gh_identify"), "VRS 2.0.1+ is required" # The function should work with VRS 2.0.1+ assert callable(add_gks_vrs) From 3c2736010a18719017554e43ae086439ac50a507 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 16 Sep 2025 14:54:48 -0400 Subject: [PATCH 09/10] Add missing ga4gh import --- tests/utils/test_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 4fb2a8d96..7ec3529b1 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -2,12 +2,12 @@ from typing import Dict, List +import ga4gh import hail as hl import pytest from gnomad.utils.annotations import ( VRS_CHROM_IDS, - add_gks_va, add_gks_vrs, annotate_downsamplings, fill_missing_key_combinations, From 577c7622f7247e71bf3f77b7acff0515ab38fd28 Mon Sep 17 00:00:00 2001 From: Mike Wilson Date: Tue, 16 Sep 2025 15:17:15 -0400 Subject: [PATCH 10/10] Fix ga4gh core and vrs imports --- tests/utils/test_annotations.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/utils/test_annotations.py b/tests/utils/test_annotations.py index 7ec3529b1..5925fd520 100644 --- a/tests/utils/test_annotations.py +++ b/tests/utils/test_annotations.py @@ -2,7 +2,8 @@ from typing import Dict, List -import ga4gh +import ga4gh.core as ga4gh_core +import ga4gh.vrs as ga4gh_vrs import hail as hl import pytest @@ -1097,17 +1098,17 @@ class TestVRSFunctions: def test_vrs_identifier_generation(self): """Test that GA4GH identifiers are generated correctly.""" # Test that we can import and access VRS modules - assert hasattr(ga4gh.core, "__version__") - assert hasattr(ga4gh.vrs, "models") - assert hasattr(ga4gh.vrs.models, "SequenceLocation") + assert hasattr(ga4gh_core, "__version__") + assert hasattr(ga4gh_vrs, "models") + assert hasattr(ga4gh_vrs.models, "SequenceLocation") # Test that VRS 2.0.1+ API is available assert hasattr( - ga4gh.core, "ga4gh_identify" + ga4gh_core, "ga4gh_identify" ), "VRS 2.0.1+ ga4gh_identify function not found" # Test that we can create a VRS object using the 2.0.1+ API - seq_loc = ga4gh.vrs.models.SequenceLocation( + seq_loc = ga4gh_vrs.models.SequenceLocation( sequenceReference="ga4gh:SQ.test", start=1, end=2, @@ -1189,7 +1190,7 @@ def test_add_gks_vrs_grch37_chromosome_mapping(self): def test_vrs_version_compatibility(self): """Test that VRS 2.0.1+ is properly installed.""" # Test that VRS 2.0.1+ API is available - assert hasattr(ga4gh.core, "ga4gh_identify"), "VRS 2.0.1+ is required" + assert hasattr(ga4gh_core, "ga4gh_identify"), "VRS 2.0.1+ is required" # The function should work with VRS 2.0.1+ assert callable(add_gks_vrs)