Skip to content

Commit 0123a5e

Browse files
drexler-skyHyukjinKwon
authored andcommitted
[SPARK-50764][PYTHON] Refine the docstring of xpath related methods
### What changes were proposed in this pull request refined docstring for xpath related methods ### Why are the changes needed? to improve docs and test coverage ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? doctests ### Was this patch authored or co-authored using generative AI tooling? No Closes #49422 from drexler-sky/docstring. Authored-by: Evan Wu <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent f0fb8bf commit 0123a5e

File tree

1 file changed

+56
-16
lines changed

1 file changed

+56
-16
lines changed

python/pyspark/sql/functions/builtin.py

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11666,10 +11666,15 @@ def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1166611666

1166711667
Examples
1166811668
--------
11669+
>>> from pyspark.sql import functions as sf
1166911670
>>> df = spark.createDataFrame(
1167011671
... [('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>',)], ['x'])
11671-
>>> df.select(xpath(df.x, lit('a/b/text()')).alias('r')).collect()
11672-
[Row(r=['b1', 'b2', 'b3'])]
11672+
>>> df.select(sf.xpath(df.x, sf.lit('a/b/text()'))).show()
11673+
+--------------------+
11674+
|xpath(x, a/b/text())|
11675+
+--------------------+
11676+
| [b1, b2, b3]|
11677+
+--------------------+
1167311678
"""
1167411679
return _invoke_function_over_columns("xpath", xml, path)
1167511680

@@ -11683,9 +11688,14 @@ def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1168311688

1168411689
Examples
1168511690
--------
11691+
>>> from pyspark.sql import functions as sf
1168611692
>>> df = spark.createDataFrame([('<a><b>1</b></a>',)], ['x'])
11687-
>>> df.select(xpath_boolean(df.x, lit('a/b')).alias('r')).collect()
11688-
[Row(r=True)]
11693+
>>> df.select(sf.xpath_boolean(df.x, sf.lit('a/b'))).show()
11694+
+---------------------+
11695+
|xpath_boolean(x, a/b)|
11696+
+---------------------+
11697+
| true|
11698+
+---------------------+
1168911699
"""
1169011700
return _invoke_function_over_columns("xpath_boolean", xml, path)
1169111701

@@ -11700,9 +11710,14 @@ def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1170011710

1170111711
Examples
1170211712
--------
11713+
>>> from pyspark.sql import functions as sf
1170311714
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11704-
>>> df.select(xpath_double(df.x, lit('sum(a/b)')).alias('r')).collect()
11705-
[Row(r=3.0)]
11715+
>>> df.select(sf.xpath_double(df.x, sf.lit('sum(a/b)'))).show()
11716+
+-------------------------+
11717+
|xpath_double(x, sum(a/b))|
11718+
+-------------------------+
11719+
| 3.0|
11720+
+-------------------------+
1170611721
"""
1170711722
return _invoke_function_over_columns("xpath_double", xml, path)
1170811723

@@ -11740,9 +11755,14 @@ def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1174011755

1174111756
Examples
1174211757
--------
11758+
>>> from pyspark.sql import functions as sf
1174311759
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11744-
>>> df.select(xpath_float(df.x, lit('sum(a/b)')).alias('r')).collect()
11745-
[Row(r=3.0)]
11760+
>>> df.select(sf.xpath_float(df.x, sf.lit('sum(a/b)'))).show()
11761+
+------------------------+
11762+
|xpath_float(x, sum(a/b))|
11763+
+------------------------+
11764+
| 3.0|
11765+
+------------------------+
1174611766
"""
1174711767
return _invoke_function_over_columns("xpath_float", xml, path)
1174811768

@@ -11757,9 +11777,14 @@ def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1175711777

1175811778
Examples
1175911779
--------
11780+
>>> from pyspark.sql import functions as sf
1176011781
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11761-
>>> df.select(xpath_int(df.x, lit('sum(a/b)')).alias('r')).collect()
11762-
[Row(r=3)]
11782+
>>> df.select(sf.xpath_int(df.x, sf.lit('sum(a/b)'))).show()
11783+
+----------------------+
11784+
|xpath_int(x, sum(a/b))|
11785+
+----------------------+
11786+
| 3|
11787+
+----------------------+
1176311788
"""
1176411789
return _invoke_function_over_columns("xpath_int", xml, path)
1176511790

@@ -11774,9 +11799,14 @@ def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1177411799

1177511800
Examples
1177611801
--------
11802+
>>> from pyspark.sql import functions as sf
1177711803
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11778-
>>> df.select(xpath_long(df.x, lit('sum(a/b)')).alias('r')).collect()
11779-
[Row(r=3)]
11804+
>>> df.select(sf.xpath_long(df.x, sf.lit('sum(a/b)'))).show()
11805+
+-----------------------+
11806+
|xpath_long(x, sum(a/b))|
11807+
+-----------------------+
11808+
| 3|
11809+
+-----------------------+
1178011810
"""
1178111811
return _invoke_function_over_columns("xpath_long", xml, path)
1178211812

@@ -11791,9 +11821,14 @@ def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1179111821

1179211822
Examples
1179311823
--------
11824+
>>> from pyspark.sql import functions as sf
1179411825
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11795-
>>> df.select(xpath_short(df.x, lit('sum(a/b)')).alias('r')).collect()
11796-
[Row(r=3)]
11826+
>>> df.select(sf.xpath_short(df.x, sf.lit('sum(a/b)'))).show()
11827+
+------------------------+
11828+
|xpath_short(x, sum(a/b))|
11829+
+------------------------+
11830+
| 3|
11831+
+------------------------+
1179711832
"""
1179811833
return _invoke_function_over_columns("xpath_short", xml, path)
1179911834

@@ -11807,9 +11842,14 @@ def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
1180711842

1180811843
Examples
1180911844
--------
11845+
>>> from pyspark.sql import functions as sf
1181011846
>>> df = spark.createDataFrame([('<a><b>b</b><c>cc</c></a>',)], ['x'])
11811-
>>> df.select(xpath_string(df.x, lit('a/c')).alias('r')).collect()
11812-
[Row(r='cc')]
11847+
>>> df.select(sf.xpath_string(df.x, sf.lit('a/c'))).show()
11848+
+--------------------+
11849+
|xpath_string(x, a/c)|
11850+
+--------------------+
11851+
| cc|
11852+
+--------------------+
1181311853
"""
1181411854
return _invoke_function_over_columns("xpath_string", xml, path)
1181511855

0 commit comments

Comments
 (0)