diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 9930c61e34b15..b4b20553ec460 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -480,7 +480,19 @@ class GroupByCythonAgg:
     param_names = ["dtype", "method"]
     params = [
         ["float64"],
-        ["sum", "prod", "min", "max", "mean", "median", "var", "first", "last"],
+        [
+            "sum",
+            "prod",
+            "min",
+            "max",
+            "mean",
+            "median",
+            "var",
+            "first",
+            "last",
+            "any",
+            "all",
+        ],
     ]
 
     def setup(self, dtype, method):
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 17cb0a7836fbe..f132cdf6bbf6e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -217,6 +217,9 @@ Other enhancements
 - :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`)
 - :meth:`round` being enabled for the nullable integer and floating dtypes (:issue:`38844`)
 - :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
+- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
+- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
+-
 
 .. ---------------------------------------------------------------------------
 
@@ -787,6 +790,8 @@ Groupby/resample/rolling
 - Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`)
 - Bug in aggregation functions for :class:`DataFrame` not respecting ``numeric_only`` argument when ``level`` keyword was given (:issue:`40660`)
 - Bug in :class:`core.window.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`)
+- Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`)
+
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 48ee01c809efd..29e74fe4c0d8f 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -388,23 +388,26 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_any_all(uint8_t[::1] out,
-                  const uint8_t[::1] values,
+def group_any_all(int8_t[::1] out,
+                  const int8_t[::1] values,
                   const intp_t[:] labels,
                   const uint8_t[::1] mask,
                   str val_test,
-                  bint skipna) -> None:
+                  bint skipna,
+                  bint nullable) -> None:
     """
-    Aggregated boolean values to show truthfulness of group elements.
+    Aggregated boolean values to show truthfulness of group elements. If the
+    input is a nullable type (nullable=True), the result will be computed
+    using Kleene logic.
 
     Parameters
     ----------
-    out : np.ndarray[np.uint8]
+    out : np.ndarray[np.int8]
         Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its
         ordering matching up to the corresponding record in `values`
-    values : np.ndarray[np.uint8]
+    values : np.ndarray[np.int8]
         Containing the truth value of each element.
     mask : np.ndarray[np.uint8]
         Indicating whether a value is na or not.
@@ -412,16 +415,20 @@ def group_any_all(uint8_t[::1] out,
         String object dictating whether to use any or all truth testing
     skipna : bool
         Flag to ignore nan values during truth testing
+    nullable : bool
+        Whether or not the input is a nullable type. If True, the
+        result will be computed using Kleene logic
 
     Notes
     -----
     This method modifies the `out` parameter rather than returning an object.
-    The returned values will either be 0 or 1 (False or True, respectively).
+    The returned values will either be 0, 1 (False or True, respectively), or
+    -1 to signify a masked position in the case of a nullable input.
     """
     cdef:
         Py_ssize_t i, N = len(labels)
         intp_t lab
-        uint8_t flag_val
+        int8_t flag_val
 
     if val_test == 'all':
         # Because the 'all' value of an empty iterable in Python is True we can
@@ -444,6 +451,16 @@ def group_any_all(uint8_t[::1] out,
             if lab < 0 or (skipna and mask[i]):
                 continue
 
+            if nullable and mask[i]:
+                # Set the position as masked if `out[lab] != flag_val`, which
+                # would indicate True/False has not yet been seen for any/all,
+                # so by Kleene logic the result is currently unknown
+                if out[lab] != flag_val:
+                    out[lab] = -1
+                continue
+
+            # If True and 'any' or False and 'all', the result is
+            # already determined
             if values[i] == flag_val:
                 out[lab] = flag_val
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 34de67092de9f..43db889618db6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -77,6 +77,8 @@ class providing the base-class of operations.
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import (
+    BaseMaskedArray,
+    BooleanArray,
     Categorical,
     ExtensionArray,
 )
@@ -1413,24 +1415,34 @@ def _bool_agg(self, val_test, skipna):
         Shared func to call any / all Cython GroupBy implementations.
         """
 
-        def objs_to_bool(vals: np.ndarray) -> tuple[np.ndarray, type]:
+        def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
             if is_object_dtype(vals):
                 vals = np.array([bool(x) for x in vals])
+            elif isinstance(vals, BaseMaskedArray):
+                vals = vals._data.astype(bool, copy=False)
             else:
                 vals = vals.astype(bool)
 
-            return vals.view(np.uint8), bool
+            return vals.view(np.int8), bool
 
-        def result_to_bool(result: np.ndarray, inference: type) -> np.ndarray:
-            return result.astype(inference, copy=False)
+        def result_to_bool(
+            result: np.ndarray,
+            inference: type,
+            nullable: bool = False,
+        ) -> ArrayLike:
+            if nullable:
+                return BooleanArray(result.astype(bool, copy=False), result == -1)
+            else:
+                return result.astype(inference, copy=False)
 
         return self._get_cythonized_result(
             "group_any_all",
             aggregate=True,
             numeric_only=False,
-            cython_dtype=np.dtype(np.uint8),
+            cython_dtype=np.dtype(np.int8),
             needs_values=True,
             needs_mask=True,
+            needs_nullable=True,
             pre_processing=objs_to_bool,
             post_processing=result_to_bool,
             val_test=val_test,
@@ -2613,6 +2625,7 @@ def _get_cythonized_result(
         needs_counts: bool = False,
         needs_values: bool = False,
         needs_2d: bool = False,
+        needs_nullable: bool = False,
         min_count: int | None = None,
         needs_mask: bool = False,
         needs_ngroups: bool = False,
@@ -2649,6 +2662,9 @@ def _get_cythonized_result(
             signature
         needs_ngroups : bool, default False
             Whether number of groups is part of the Cython call signature
+        needs_nullable : bool, default False
+            Whether a bool specifying if the input is nullable is part
+            of the Cython call signature
         result_is_index : bool, default False
             Whether the result of the Cython operation is an index of
             values to be retrieved, instead of the actual values themselves
@@ -2664,7 +2680,8 @@ def _get_cythonized_result(
             Function to be applied to result of Cython function. Should accept
             an array of values as the first argument and type inferences as its
             second argument, i.e. the signature should be
-            (ndarray, Type).
+            (ndarray, Type). If `needs_nullable=True`, a third argument should be
+            `nullable`, to allow for processing specific to nullable values.
         **kwargs : dict
             Extra arguments to be passed back to Cython funcs
 
@@ -2739,6 +2756,12 @@ def _get_cythonized_result(
             if needs_ngroups:
                 func = partial(func, ngroups)
 
+            if needs_nullable:
+                is_nullable = isinstance(values, BaseMaskedArray)
+                func = partial(func, nullable=is_nullable)
+                if post_processing:
+                    post_processing = partial(post_processing, nullable=is_nullable)
+
             func(**kwargs)  # Call func to modify indexer values in place
 
             if needs_2d:
diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py
index 4123fb95002dd..d4f80aa0e51d4 100644
--- a/pandas/tests/groupby/test_any_all.py
+++ b/pandas/tests/groupby/test_any_all.py
@@ -3,9 +3,11 @@
 import numpy as np
 import pytest
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
+    Series,
     isna,
 )
 import pandas._testing as tm
@@ -68,3 +70,85 @@ def test_bool_aggs_dup_column_labels(bool_agg_func):
 
     expected = df
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+@pytest.mark.parametrize("skipna", [True, False])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [False, False, False],
+        [True, True, True],
+        [pd.NA, pd.NA, pd.NA],
+        [False, pd.NA, False],
+        [True, pd.NA, True],
+        [True, pd.NA, False],
+    ],
+)
+def test_masked_kleene_logic(bool_agg_func, skipna, data):
+    # GH#37506
+    ser = Series(data, dtype="boolean")
+
+    # The result should match aggregating on the whole series. Correctness
+    # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic
+    expected_data = getattr(ser, bool_agg_func)(skipna=skipna)
+    expected = Series(expected_data, dtype="boolean")
+
+    result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype1,dtype2,exp_col1,exp_col2",
+    [
+        (
+            "float",
+            "Float64",
+            np.array([True], dtype=bool),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+        (
+            "Int64",
+            "float",
+            pd.array([pd.NA], dtype="boolean"),
+            np.array([True], dtype=bool),
+        ),
+        (
+            "Int64",
+            "Int64",
+            pd.array([pd.NA], dtype="boolean"),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+        (
+            "Float64",
+            "boolean",
+            pd.array([pd.NA], dtype="boolean"),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+    ],
+)
+def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2):
+    # GH#37506
+    data = [1.0, np.nan]
+    df = DataFrame(
+        {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)}
+    )
+    result = df.groupby([1, 1]).agg("all", skipna=False)
+
+    expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+@pytest.mark.parametrize("skipna", [True, False])
+def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series):
+    # GH#40585
+    obj = frame_or_series([pd.NA, 1], dtype=dtype)
+    expected_res = True
+    if not skipna and bool_agg_func == "all":
+        expected_res = pd.NA
+    expected = frame_or_series([expected_res], index=[1], dtype="boolean")
+
+    result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna)
+    tm.assert_equal(result, expected)
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 6e0422a527639..906f05fe0f348 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -941,31 +941,45 @@ def test_all_any_params(self):
         with pytest.raises(NotImplementedError, match=msg):
             s.all(bool_only=True)
 
-    def test_all_any_boolean(self):
-        # Check skipna, with boolean type
-        s1 = Series([pd.NA, True], dtype="boolean")
-        s2 = Series([pd.NA, False], dtype="boolean")
-        assert s1.all(skipna=False) is pd.NA  # NA && True => NA
-        assert s1.all(skipna=True)
-        assert s2.any(skipna=False) is pd.NA  # NA || False => NA
-        assert not s2.any(skipna=True)
+    @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+    @pytest.mark.parametrize("skipna", [True, False])
+    @pytest.mark.parametrize(
+        # expected_data indexed as [[skipna=False/any, skipna=False/all],
+        #                           [skipna=True/any, skipna=True/all]]
+        "data,expected_data",
+        [
+            ([False, False, False], [[False, False], [False, False]]),
+            ([True, True, True], [[True, True], [True, True]]),
+            ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]),
+            ([False, pd.NA, False], [[pd.NA, False], [False, False]]),
+            ([True, pd.NA, True], [[True, pd.NA], [True, True]]),
+            ([True, pd.NA, False], [[True, False], [True, False]]),
+        ],
+    )
+    def test_any_all_boolean_kleene_logic(
+        self, bool_agg_func, skipna, data, expected_data
+    ):
+        ser = Series(data, dtype="boolean")
+        expected = expected_data[skipna][bool_agg_func == "all"]
 
-        # GH-33253: all True / all False values buggy with skipna=False
-        s3 = Series([True, True], dtype="boolean")
-        s4 = Series([False, False], dtype="boolean")
-        assert s3.all(skipna=False)
-        assert not s4.any(skipna=False)
+        result = getattr(ser, bool_agg_func)(skipna=skipna)
+        assert (result is pd.NA and expected is pd.NA) or result == expected
 
-        # Check level TODO(GH-33449) result should also be boolean
-        s = Series(
+    @pytest.mark.parametrize(
+        "bool_agg_func,expected",
+        [("all", [False, True, False]), ("any", [False, True, True])],
+    )
+    def test_any_all_boolean_level(self, bool_agg_func, expected):
+        # GH#33449
+        ser = Series(
             [False, False, True, True, False, True],
             index=[0, 0, 1, 1, 2, 2],
             dtype="boolean",
         )
         with tm.assert_produces_warning(FutureWarning):
-            tm.assert_series_equal(s.all(level=0), Series([False, True, False]))
-        with tm.assert_produces_warning(FutureWarning):
-            tm.assert_series_equal(s.any(level=0), Series([False, True, True]))
+            result = getattr(ser, bool_agg_func)(level=0)
+        expected = Series(expected, dtype="boolean")
+        tm.assert_series_equal(result, expected)
 
     def test_any_axis1_bool_only(self):
         # GH#32432