diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index f34e5daa063..4f655c6a95c 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -7,6 +7,10 @@
 from torchvision.prototype.transforms import Transform
 
 
+# I recommend deleting all classes from this branch that are old to assist code reviews. 
+# I know that you are wokring on this already #5323 but I would try to keep your feature branch as clean as possible.
+# The issue here is that these implementations are incompatible with the functional approach on the same branch which can lead to confusion.
+
 class HorizontalFlip(Transform):
     NO_OP_FEATURE_TYPES = {Label}
 
diff --git a/torchvision/prototype/transforms/functional/_augment.py b/torchvision/prototype/transforms/functional/_augment.py
index 814c34e5b00..a0b975b998b 100644
--- a/torchvision/prototype/transforms/functional/_augment.py
+++ b/torchvision/prototype/transforms/functional/_augment.py
@@ -24,6 +24,7 @@ def mixup_one_hot_label(one_hot_label_batch: torch.Tensor, *, lam: float, inplac
 
 
 def cutmix_image(image: torch.Tensor, *, box: Tuple[int, int, int, int], inplace: bool = False) -> torch.Tensor:
+    # Shouldn't be we using `image_batch` similar to `mixup_image()`, to indicate that the input must be a batch?
     if not inplace:
         image = image.clone()
 
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index c8142742fa8..733da291aa0 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -3,7 +3,7 @@
 import torch
 from torchvision.prototype.features import BoundingBoxFormat
 from torchvision.transforms import (  # noqa: F401
-    functional as _F,
+    functional as _F,  # Shouldn't we importing `functional_tensor`?
     InterpolationMode,
 )
 
@@ -11,7 +11,7 @@
 
 
 def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor:
-    return image.flip((-1,))
+    return image.flip((-1,))  # Why not use the _F.hflip()?
 
 
 def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tuple[int, int]) -> torch.Tensor:
@@ -20,7 +20,7 @@ def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tupl
         old_format=BoundingBoxFormat.XYXY,
         new_format=BoundingBoxFormat.XYWH,
     ).unbind(-1)
-    x = image_size[1] - (x + w)
+    x = image_size[1] - (x + w)  # Why not avoid formatting and do straight `boxes[:, [0, 2]] = width - boxes[:, [2, 0]]`?
     return convert_bounding_box_format(
         torch.stack((x, y, w, h), dim=-1),
         old_format=BoundingBoxFormat.XYWH,
@@ -28,7 +28,7 @@ def horizontal_flip_bounding_box(bounding_box: torch.Tensor, *, image_size: Tupl
     )
 
 
-_resize_image = _F.resize
+_resize_image = _F.resize  # How about videos? The low level transfroms supports it
 
 
 def resize_image(
@@ -40,7 +40,7 @@ def resize_image(
 ) -> torch.Tensor:
     new_height, new_width = size
     num_channels, old_height, old_width = image.shape[-3:]
-    batch_shape = image.shape[:-3]
+    batch_shape = image.shape[:-3]  # In some places you use `image` to denote batches and in others `image_batch`. Should we align the names?
     return _resize_image(
         image.reshape((-1, num_channels, old_height, old_width)),
         size=size,
@@ -71,7 +71,7 @@ def resize_bounding_box(
 ) -> torch.Tensor:
     old_height, old_width = old_image_size
     new_height, new_width = new_image_size
-    return (
+    return (  # Shouldn't we be using a low-level kernel instead, similar to above?
         bounding_box.view(-1, 2, 2)
         .mul(torch.tensor([new_width / old_width, new_height / old_height]))
         .view(bounding_box.shape)
diff --git a/torchvision/prototype/transforms/functional/_meta_conversion.py b/torchvision/prototype/transforms/functional/_meta_conversion.py
index 484066a39ee..9df5899a38a 100644
--- a/torchvision/prototype/transforms/functional/_meta_conversion.py
+++ b/torchvision/prototype/transforms/functional/_meta_conversion.py
@@ -4,7 +4,7 @@
 
 
 def _xywh_to_xyxy(xywh: torch.Tensor) -> torch.Tensor:
-    xyxy = xywh.clone()
+    xyxy = xywh.clone()  # Nice. Do we have tests to ensure that no transform does an in-place modification of input?
     xyxy[..., 2:] += xyxy[..., :2]
     return xyxy