@@ -641,60 +641,8 @@ def finalise(self, show_progress=False):
641
641
def create_index (self ):
642
642
"""Create an index to support efficient region queries."""
643
643
644
- root = zarr .open_group (store = self .path , mode = "r+" )
645
-
646
- contig = root ["variant_contig" ]
647
- pos = root ["variant_position" ]
648
- length = root ["variant_length" ]
649
-
650
- assert contig .cdata_shape == pos .cdata_shape
651
-
652
- index = []
653
-
654
- logger .info ("Creating region index" )
655
- for v_chunk in range (pos .cdata_shape [0 ]):
656
- c = contig .blocks [v_chunk ]
657
- p = pos .blocks [v_chunk ]
658
- e = p + length .blocks [v_chunk ] - 1
659
-
660
- # create a row for each contig in the chunk
661
- d = np .diff (c , append = - 1 )
662
- c_start_idx = 0
663
- for c_end_idx in np .nonzero (d )[0 ]:
664
- assert c [c_start_idx ] == c [c_end_idx ]
665
- index .append (
666
- (
667
- v_chunk , # chunk index
668
- c [c_start_idx ], # contig ID
669
- p [c_start_idx ], # start
670
- p [c_end_idx ], # end
671
- np .max (e [c_start_idx : c_end_idx + 1 ]), # max end
672
- c_end_idx - c_start_idx + 1 , # num records
673
- )
674
- )
675
- c_start_idx = c_end_idx + 1
676
-
677
- index = np .array (index , dtype = pos .dtype )
678
- kwargs = {}
679
- if not zarr_utils .zarr_v3 ():
680
- kwargs ["dimension_separator" ] = self .metadata .dimension_separator
681
- array = root .array (
682
- "region_index" ,
683
- data = index ,
684
- shape = index .shape ,
685
- chunks = index .shape ,
686
- dtype = index .dtype ,
687
- compressor = numcodecs .Blosc ("zstd" , clevel = 9 , shuffle = 0 ),
688
- fill_value = None ,
689
- ** kwargs ,
690
- )
691
- array .attrs ["_ARRAY_DIMENSIONS" ] = [
692
- "region_index_values" ,
693
- "region_index_fields" ,
694
- ]
695
-
696
- logger .info ("Consolidating Zarr metadata" )
697
- zarr .consolidate_metadata (self .path )
644
+ indexer = VcfZarrIndexer (self .path )
645
+ indexer .create_index ()
698
646
699
647
######################
700
648
# encode_all_partitions
0 commit comments