fixes

eclipse0922 · eclipse0922 · commit 1bf0850d2089 · 2025-09-25T13:43:14.000+09:00
Signed-off-by: sewon.jeon &lt;sewon.jeon@connecteve.com&gt;
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
@@ -753,7 +753,14 @@ def __call__(self, img: Sequence[NdarrayOrTensor] | NdarrayOrTensor) -> NdarrayO
 
 class GenerateHeatmap(Transform):
     """
-    Generate per-landmark gaussian response maps for 2D or 3D coordinates.
+    Generate per-landmark Gaussian heatmaps for 2D or 3D coordinates.
+
+    Notes:
+        - Coordinates are interpreted in voxel units and expected in (Y, X) for 2D or (Z, Y, X) for 3D.
+        - Output shape:
+            - Non-batched points (N, D): (N, H, W[, D])
+            - Batched points (B, N, D): (B, N, H, W[, D])
+        - Each channel corresponds to one landmark.
 
     Args:
         sigma: gaussian standard deviation. A single value is broadcast across all spatial dimensions.
@@ -829,11 +836,13 @@ def __call__(self, points: NdarrayOrTensor, spatial_shape: Sequence[int] | None
                     continue
                 region = heatmap[b_idx, idx][window_slices]
                 gaussian = self._evaluate_gaussian(coord_shifts, sigma)
-                torch.maximum(region, gaussian, out=region)
+                updated = torch.maximum(region, gaussian)
+                # write back
+                region.copy_(updated)
                 if self.normalize:
-                    max_val = heatmap[b_idx, idx].max()
-                    if max_val.item() > 0:
-                        heatmap[b_idx, idx] /= max_val
+                    peak = updated.max()
+                    if peak.item() > 0:
+                        heatmap[b_idx, idx] /= peak
 
         if not is_batched:
             heatmap = heatmap.squeeze(0)
@@ -851,7 +860,9 @@ def _resolve_spatial_shape(self, call_shape: Sequence[int] | None, spatial_dims:
             if len(shape_tuple) == 1:
                 shape_tuple = shape_tuple * spatial_dims  # type: ignore
             else:
-                raise ValueError("spatial_shape length must match spatial dimension of the landmarks.")
+                raise ValueError(
+                    "spatial_shape length must match the landmarks' spatial dims (or pass a single int to broadcast)."
+                )
         return tuple(int(s) for s in shape_tuple)
 
     def _resolve_sigma(self, spatial_dims: int) -> tuple[float, ...]:
@@ -879,7 +890,7 @@ def _make_window(
             if start >= stop:
                 return None, ()
             slices.append(slice(start, stop))
-            coord_shifts.append(torch.arange(start, stop, device=device, dtype=self.torch_dtype) - float(c))
+            coord_shifts.append(torch.arange(start, stop, device=device, dtype=torch.float32) - float(c))
         return tuple(slices), tuple(coord_shifts)
 
     def _evaluate_gaussian(self, coord_shifts: tuple[torch.Tensor, ...], sigma: tuple[float, ...]) -> torch.Tensor:
@@ -897,13 +908,15 @@ def _evaluate_gaussian(self, coord_shifts: tuple[torch.Tensor, ...], sigma: tupl
         shape = tuple(len(axis) for axis in coord_shifts)
         if 0 in shape:
             return torch.zeros(shape, dtype=self.torch_dtype, device=device)
-        exponent = torch.zeros(shape, dtype=self.torch_dtype, device=device)
+        exponent = torch.zeros(shape, dtype=torch.float32, device=device)
         for dim, (shift, sig) in enumerate(zip(coord_shifts, sigma)):
-            scaled = (shift / float(sig)) ** 2
+            shift32 = shift.to(torch.float32)
+            scaled = (shift32 / float(sig)) ** 2
             reshape_shape = [1] * len(coord_shifts)
             reshape_shape[dim] = shift.numel()
             exponent += scaled.reshape(reshape_shape)
-        return torch.exp(-0.5 * exponent)
+        gauss = torch.exp(-0.5 * exponent)
+        return gauss.to(dtype=self.torch_dtype)
 
 
 class ProbNMS(Transform):
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
@@ -517,6 +517,13 @@ class GenerateHeatmapd(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.GenerateHeatmap`.
     Converts landmark coordinates into gaussian heatmaps and optionally copies metadata from a reference image.
+
+    Notes:
+        - Default heatmap_keys are generated as "{key}_heatmap" for each input key
+        - Shape inference precedence: static spatial_shape > ref_image
+        - Output shapes:
+            - Non-batched points (N, D): (N, H, W[, D])
+            - Batched points (B, N, D): (B, N, H, W[, D])
     """
 
     backend = GenerateHeatmap.backend
@@ -538,7 +545,7 @@ def __init__(
         spatial_shape: Sequence[int] | Sequence[Sequence[int]] | None = None,
         truncated: float = 4.0,
         normalize: bool = True,
-        dtype: np.dtype | type = np.float32,
+        dtype: np.dtype | torch.dtype | type = np.float32,
         allow_missing_keys: bool = False,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
@@ -567,6 +574,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> dict[Hashable, Any]:
                 )
                 # Copy metadata if reference is MetaTensor
                 if isinstance(reference, MetaTensor) and isinstance(heatmap, MetaTensor):
+                    heatmap.affine = reference.affine
                     self._update_spatial_metadata(heatmap, reference)
             d[out_key] = heatmap
         return d
@@ -640,18 +648,8 @@ def _shape_from_reference(self, reference: Any, spatial_dims: int) -> tuple[int,
 
     def _update_spatial_metadata(self, heatmap: MetaTensor, reference: MetaTensor) -> None:
         """Update spatial metadata of heatmap based on its dimensions."""
-        # Determine if batched based on reference's batch dimension
-        ref_spatial_shape = reference.meta.get("spatial_shape", [])
-        ref_is_batched = len(reference.shape) > len(ref_spatial_shape) + 1
-
-        if heatmap.ndim == 5:  # 3D batched: (B, C, H, W, D)
-            spatial_shape = heatmap.shape[2:]
-        elif heatmap.ndim == 4:  # 2D batched (B, C, H, W) or 3D non-batched (C, H, W, D)
-            # Disambiguate: 2D batched vs 3D non-batched
-            spatial_shape = heatmap.shape[2:] if ref_is_batched else heatmap.shape[1:]
-        else:  # 2D non-batched: (C, H, W)
-            spatial_shape = heatmap.shape[1:]
-
+        # trailing dims after channel are spatial regardless of batch presence
+        spatial_shape = heatmap.shape[-(reference.ndim - 1) :]
         heatmap.meta["spatial_shape"] = tuple(int(v) for v in spatial_shape)
 
 
diff --git a/tests/transforms/test_generate_heatmapd.py b/tests/transforms/test_generate_heatmapd.py
@@ -53,7 +53,7 @@
             f"dict_static_shape_{len(shape)}d",
             np.array([[1.0] * len(shape)], dtype=np.float32),
             {"spatial_shape": shape},
-            (1,) + shape,
+            (1, *shape),
             np.float32,
         ]
     )
@@ -165,7 +165,8 @@ def test_dict_batched_with_ref(self, _, points, params, expected_shape, _expecte
         assert_allclose(heatmap.affine, image.affine, type_test=False)
 
         # Check max values
-        max_vals = heatmap.max(dim=2)[0].max(dim=2)[0].max(dim=2)[0]
+        hm2 = heatmap.reshape(heatmap.shape[0], heatmap.shape[1], -1)
+        max_vals = hm2.max(dim=2)[0]
         np.testing.assert_allclose(
             max_vals.cpu().numpy(), np.ones((expected_shape[0], expected_shape[1])), rtol=1e-5, atol=1e-5
         )

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@`
`53`	`53`	`f"dict_static_shape_{len(shape)}d",`
`54`	`54`	`np.array([[1.0] * len(shape)], dtype=np.float32),`
`55`	`55`	`{"spatial_shape": shape},`
`56`		`- (1,) + shape,`
	`56`	`+ (1, *shape),`
`57`	`57`	`np.float32,`
`58`	`58`	`]`
`59`	`59`	`)`
`@@ -165,7 +165,8 @@ def test_dict_batched_with_ref(self, _, points, params, expected_shape, _expecte`
`165`	`165`	`assert_allclose(heatmap.affine, image.affine, type_test=False)`
`166`	`166`
`167`	`167`	`# Check max values`
`168`		`- max_vals = heatmap.max(dim=2)[0].max(dim=2)[0].max(dim=2)[0]`
	`168`	`+ hm2 = heatmap.reshape(heatmap.shape[0], heatmap.shape[1], -1)`
	`169`	`+ max_vals = hm2.max(dim=2)[0]`
`169`	`170`	`np.testing.assert_allclose(`
`170`	`171`	`max_vals.cpu().numpy(), np.ones((expected_shape[0], expected_shape[1])), rtol=1e-5, atol=1e-5`
`171`	`172`	`)`