diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 1619b009e9364..10a39497c8ed9 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20241002
+ENV FULL_REFRESH_DATE 20241007
 
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN true
@@ -91,10 +91,10 @@ RUN mkdir -p /usr/local/pypy/pypy3.9 && \
     ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.9 && \
     ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install 'numpy==1.26.4' 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
 
 
-ARG BASIC_PIP_PKGS="numpy==1.26.4 pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4 graphviz==0.20.3"
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 937753b50bb13..b89755d9c18a5 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -699,7 +699,7 @@ class LinearSVC(
     >>> model_path = temp_path + "/svm_model"
     >>> model.save(model_path)
     >>> model2 = LinearSVCModel.load(model_path)
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
     >>> model.intercept == model2.intercept
     True
@@ -1210,7 +1210,7 @@ class LogisticRegression(
     >>> model_path = temp_path + "/lr_model"
     >>> blorModel.save(model_path)
     >>> model2 = LogisticRegressionModel.load(model_path)
-    >>> blorModel.coefficients[0] == model2.coefficients[0]
+    >>> bool(blorModel.coefficients[0] == model2.coefficients[0])
     True
     >>> blorModel.intercept == model2.intercept
     True
@@ -2038,9 +2038,9 @@ class RandomForestClassifier(
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
-    >>> numpy.argmax(result.probability)
+    >>> int(numpy.argmax(result.probability))
     0
-    >>> numpy.argmax(result.newRawPrediction)
+    >>> int(numpy.argmax(result.newRawPrediction))
     0
     >>> result.leafId
     DenseVector([0.0, 0.0, 0.0])
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index d08e241b41d23..d7cc27e274279 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -266,7 +266,7 @@ class LinearRegression(
     True
     >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
     True
-    >>> abs(model.coefficients[0] - 1.0) < 0.001
+    >>> bool(abs(model.coefficients[0] - 1.0) < 0.001)
     True
     >>> abs(model.intercept - 0.0) < 0.001
     True
@@ -283,11 +283,11 @@ class LinearRegression(
     >>> model_path = temp_path + "/lr_model"
     >>> model.save(model_path)
     >>> model2 = LinearRegressionModel.load(model_path)
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
-    >>> model.intercept == model2.intercept
+    >>> bool(model.intercept == model2.intercept)
     True
-    >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
+    >>> bool(model.transform(test0).take(1) == model2.transform(test0).take(1))
     True
     >>> model.numFeatures
     1
@@ -2542,7 +2542,7 @@ class GeneralizedLinearRegression(
     >>> model2 = GeneralizedLinearRegressionModel.load(model_path)
     >>> model.intercept == model2.intercept
     True
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
     >>> model.transform(df).take(1) == model2.transform(df).take(1)
     True
diff --git a/python/pyspark/ml/tests/test_functions.py b/python/pyspark/ml/tests/test_functions.py
index 7df0a26394140..e67e46ded67bd 100644
--- a/python/pyspark/ml/tests/test_functions.py
+++ b/python/pyspark/ml/tests/test_functions.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 
+from pyspark.loose_version import LooseVersion
 from pyspark.ml.functions import predict_batch_udf
 from pyspark.sql.functions import array, struct, col
 from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructType, StructField, FloatType
@@ -193,6 +194,10 @@ def predict(inputs):
         batch_sizes = preds["preds"].to_numpy()
         self.assertTrue(all(batch_sizes <= batch_size))
 
+    # TODO(SPARK-49793): enable the test below
+    @unittest.skipIf(
+        LooseVersion(np.__version__) >= LooseVersion("2"), "Caching does not work with numpy 2"
+    )
     def test_caching(self):
         def make_predict_fn():
             # emulate loading a model, this should only be invoked once (per worker process)
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index e8713d81c4d62..888beff663523 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -706,7 +706,7 @@ class CrossValidator(
     >>> cvModel = cv.fit(dataset)
     >>> cvModel.getNumFolds()
     3
-    >>> cvModel.avgMetrics[0]
+    >>> float(cvModel.avgMetrics[0])
     0.5
     >>> path = tempfile.mkdtemp()
     >>> model_path = path + "/model"
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 1e1795d9fb3d4..bf8fd04dc2837 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -172,9 +172,9 @@ class LogisticRegressionModel(LinearClassificationModel):
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LogisticRegressionModel.load(sc, path)
-    >>> sameModel.predict(numpy.array([0.0, 1.0]))
+    >>> int(sameModel.predict(numpy.array([0.0, 1.0])))
     1
-    >>> sameModel.predict(SparseVector(2, {0: 1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {0: 1.0})))
     0
     >>> from shutil import rmtree
     >>> try:
@@ -555,7 +555,7 @@ class SVMModel(LinearClassificationModel):
     >>> svm.predict(sc.parallelize([[1.0]])).collect()
     [1]
     >>> svm.clearThreshold()
-    >>> svm.predict(numpy.array([1.0]))
+    >>> float(svm.predict(numpy.array([1.0])))
     1.44...
 
     >>> sparse_data = [
@@ -573,9 +573,9 @@ class SVMModel(LinearClassificationModel):
     >>> path = tempfile.mkdtemp()
     >>> svm.save(sc, path)
     >>> sameModel = SVMModel.load(sc, path)
-    >>> sameModel.predict(SparseVector(2, {1: 1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {1: 1.0})))
     1
-    >>> sameModel.predict(SparseVector(2, {0: -1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {0: -1.0})))
     0
     >>> from shutil import rmtree
     >>> try:
@@ -756,11 +756,11 @@ class NaiveBayesModel(Saveable, Loader["NaiveBayesModel"]):
     ...     LabeledPoint(1.0, [1.0, 0.0]),
     ... ]
     >>> model = NaiveBayes.train(sc.parallelize(data))
-    >>> model.predict(numpy.array([0.0, 1.0]))
+    >>> float(model.predict(numpy.array([0.0, 1.0])))
     0.0
-    >>> model.predict(numpy.array([1.0, 0.0]))
+    >>> float(model.predict(numpy.array([1.0, 0.0])))
     1.0
-    >>> model.predict(sc.parallelize([[1.0, 0.0]])).collect()
+    >>> list(map(float, model.predict(sc.parallelize([[1.0, 0.0]])).collect()))
     [1.0]
     >>> sparse_data = [
     ...     LabeledPoint(0.0, SparseVector(2, {1: 0.0})),
@@ -768,15 +768,18 @@ class NaiveBayesModel(Saveable, Loader["NaiveBayesModel"]):
     ...     LabeledPoint(1.0, SparseVector(2, {0: 1.0}))
     ... ]
     >>> model = NaiveBayes.train(sc.parallelize(sparse_data))
-    >>> model.predict(SparseVector(2, {1: 1.0}))
+    >>> float(model.predict(SparseVector(2, {1: 1.0})))
     0.0
-    >>> model.predict(SparseVector(2, {0: 1.0}))
+    >>> float(model.predict(SparseVector(2, {0: 1.0})))
     1.0
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> model.save(sc, path)
     >>> sameModel = NaiveBayesModel.load(sc, path)
-    >>> sameModel.predict(SparseVector(2, {0: 1.0})) == model.predict(SparseVector(2, {0: 1.0}))
+    >>> bool((
+    ...     sameModel.predict(SparseVector(2, {0: 1.0})) ==
+    ...     model.predict(SparseVector(2, {0: 1.0}))
+    ... ))
     True
     >>> from shutil import rmtree
     >>> try:
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 24884f4853371..915a55595cb53 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -554,9 +554,9 @@ class PCA:
     ...     Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0])]
     >>> model = PCA(2).fit(sc.parallelize(data))
     >>> pcArray = model.transform(Vectors.sparse(5, [(1, 1.0), (3, 7.0)])).toArray()
-    >>> pcArray[0]
+    >>> float(pcArray[0])
     1.648...
-    >>> pcArray[1]
+    >>> float(pcArray[1])
     -4.013...
     """
 
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 80bbd717071dc..dbe1048a64b36 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -134,9 +134,9 @@ def normalRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - 0.0) < 0.1
+        >>> bool(abs(stats.mean() - 0.0) < 0.1)
         True
-        >>> abs(stats.stdev() - 1.0) < 0.1
+        >>> bool(abs(stats.stdev() - 1.0) < 0.1)
         True
         """
         return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
@@ -186,10 +186,10 @@ def logNormalRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - expMean) < 0.5
+        >>> bool(abs(stats.mean() - expMean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - expStd) < 0.5
+        >>> bool(abs(stats.stdev() - expStd) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -238,7 +238,7 @@ def poissonRDD(
         >>> abs(stats.mean() - mean) < 0.5
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        >>> bool(abs(stats.stdev() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
@@ -285,7 +285,7 @@ def exponentialRDD(
         >>> abs(stats.mean() - mean) < 0.5
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        >>> bool(abs(stats.stdev() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
@@ -336,9 +336,9 @@ def gammaRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - expMean) < 0.5
+        >>> bool(abs(stats.mean() - expMean) < 0.5)
         True
-        >>> abs(stats.stdev() - expStd) < 0.5
+        >>> bool(abs(stats.stdev() - expStd) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -384,7 +384,7 @@ def uniformVectorRDD(
         >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
         >>> mat.shape
         (10, 10)
-        >>> mat.max() <= 1.0 and mat.min() >= 0.0
+        >>> bool(mat.max() <= 1.0 and mat.min() >= 0.0)
         True
         >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
         4
@@ -430,9 +430,9 @@ def normalVectorRDD(
         >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1).collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - 0.0) < 0.1
+        >>> bool(abs(mat.mean() - 0.0) < 0.1)
         True
-        >>> abs(mat.std() - 1.0) < 0.1
+        >>> bool(abs(mat.std() - 1.0) < 0.1)
         True
         """
         return callMLlibFunc("normalVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
@@ -488,9 +488,9 @@ def logNormalVectorRDD(
         >>> mat = np.matrix(m)
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - expMean) < 0.1
+        >>> bool(abs(mat.mean() - expMean) < 0.1)
         True
-        >>> abs(mat.std() - expStd) < 0.1
+        >>> bool(abs(mat.std() - expStd) < 0.1)
         True
         """
         return callMLlibFunc(
@@ -545,13 +545,13 @@ def poissonVectorRDD(
         >>> import numpy as np
         >>> mean = 100.0
         >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1)
-        >>> mat = np.mat(rdd.collect())
+        >>> mat = np.asmatrix(rdd.collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - mean) < 0.5
+        >>> bool(abs(mat.mean() - mean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        >>> bool(abs(mat.std() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -599,13 +599,13 @@ def exponentialVectorRDD(
         >>> import numpy as np
         >>> mean = 0.5
         >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1)
-        >>> mat = np.mat(rdd.collect())
+        >>> mat = np.asmatrix(rdd.collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - mean) < 0.5
+        >>> bool(abs(mat.mean() - mean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        >>> bool(abs(mat.std() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -662,9 +662,9 @@ def gammaVectorRDD(
         >>> mat = np.matrix(RandomRDDs.gammaVectorRDD(sc, shape, scale, 100, 100, seed=1).collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - expMean) < 0.1
+        >>> bool(abs(mat.mean() - expMean) < 0.1)
         True
-        >>> abs(mat.std() - expStd) < 0.1
+        >>> bool(abs(mat.std() - expStd) < 0.1)
         True
         """
         return callMLlibFunc(
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index f1003327912d0..87f05bc0979b8 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -144,9 +144,9 @@ class LinearRegressionModelBase(LinearModel):
     --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)
-    >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6
+    >>> bool(abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6)
     True
-    >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6
+    >>> bool(abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6)
     True
     """
 
@@ -190,23 +190,23 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LinearRegressionModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -221,16 +221,16 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...    miniBatchFraction=1.0, initialWeights=np.array([1.0]), regParam=0.1, regType="l2",
     ...    intercept=True, validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -402,23 +402,23 @@ class LassoModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LassoWithSGD.train(
     ...     sc.parallelize(data), iterations=10, initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LassoModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -433,16 +433,16 @@ class LassoModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -580,23 +580,23 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = RidgeRegressionModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -611,16 +611,16 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -764,19 +764,19 @@ class IsotonicRegressionModel(Saveable, Loader["IsotonicRegressionModel"]):
     --------
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
-    >>> irm.predict(3)
+    >>> float(irm.predict(3))
     2.0
-    >>> irm.predict(5)
+    >>> float(irm.predict(5))
     16.5
-    >>> irm.predict(sc.parallelize([3, 5])).collect()
+    >>> list(map(float, irm.predict(sc.parallelize([3, 5])).collect()))
     [2.0, 16.5]
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> irm.save(sc, path)
     >>> sameModel = IsotonicRegressionModel.load(sc, path)
-    >>> sameModel.predict(3)
+    >>> float(sameModel.predict(3))
     2.0
-    >>> sameModel.predict(5)
+    >>> float(sameModel.predict(5))
     16.5
     >>> from shutil import rmtree
     >>> try:
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 6e63cff1d37b9..55f15fd2eb1a2 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -2631,7 +2631,7 @@ def first_valid_index(self) -> Optional[Union[Scalar, Tuple[Scalar, ...]]]:
         500    5.0
         dtype: float64
 
-        >>> s.first_valid_index()
+        >>> int(s.first_valid_index())
         300
 
         Support for MultiIndex
@@ -2950,7 +2950,7 @@ def get(self, key: Any, default: Optional[Any] = None) -> Any:
         20  1  b
         20  2  b
 
-        >>> df.x.get(10)
+        >>> int(df.x.get(10))
         0
 
         >>> df.x.get(20)
@@ -3008,7 +3008,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
         0    2
         dtype: int64
 
-        >>> even_primes.squeeze()
+        >>> int(even_primes.squeeze())
         2
 
         Squeezing objects with more than one value in every axis does nothing:
@@ -3066,7 +3066,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
 
         Squeezing all axes will project directly into a scalar:
 
-        >>> df_1a.squeeze()
+        >>> int(df_1a.squeeze())
         3
         """
         if axis is not None:
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index b5bf65a4907b7..c93366a31e315 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -122,7 +122,7 @@ class AtIndexer(IndexerLike):
 
     Get value at specified row/column pair
 
-    >>> psdf.at[4, 'B']
+    >>> int(psdf.at[4, 'B'])
     2
 
     Get array if an index occurs multiple times
@@ -202,7 +202,7 @@ class iAtIndexer(IndexerLike):
 
     Get value at specified row/column pair
 
-    >>> df.iat[1, 2]
+    >>> int(df.iat[1, 2])
     1
 
     Get value within a series
@@ -214,7 +214,7 @@ class iAtIndexer(IndexerLike):
     30    3
     dtype: int64
 
-    >>> psser.iat[1]
+    >>> int(psser.iat[1])
     2
     """
 
@@ -853,7 +853,7 @@ class LocIndexer(LocIndexerLike):
 
     Single label for column.
 
-    >>> df.loc['cobra', 'shield']
+    >>> int(df.loc['cobra', 'shield'])
     2
 
     List of labels for row.
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index ff941b692f95f..7e276860fbab1 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -4558,7 +4558,7 @@ def pop(self, item: Name) -> Union["Series", Scalar]:
         C    2
         dtype: int64
 
-        >>> s.pop('A')
+        >>> int(s.pop('A'))
         0
 
         >>> s
@@ -5821,7 +5821,7 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
 
         A scalar `where`.
 
-        >>> s.asof(20)
+        >>> float(s.asof(20))
         2.0
 
         For a sequence `where`, a Series is returned. The first value is
@@ -5836,12 +5836,12 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
         Missing values are not considered. The following is ``2.0``, not
         NaN, even though NaN is at the index location for ``30``.
 
-        >>> s.asof(30)
+        >>> float(s.asof(30))
         2.0
 
         >>> s = ps.Series([1, 2, np.nan, 4], index=[10, 30, 20, 40])
         >>> with ps.option_context("compute.eager_check", False):
-        ...     s.asof(20)
+        ...     float(s.asof(20))
         ...
         1.0
         """