@@ -11666,10 +11666,15 @@ def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11666
11666
11667
11667
Examples
11668
11668
--------
11669
+ >>> from pyspark.sql import functions as sf
11669
11670
>>> df = spark.createDataFrame(
11670
11671
... [('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>',)], ['x'])
11671
- >>> df.select(xpath(df.x, lit('a/b/text()')).alias('r')).collect()
11672
- [Row(r=['b1', 'b2', 'b3'])]
11672
+ >>> df.select(sf.xpath(df.x, sf.lit('a/b/text()'))).show()
11673
+ +--------------------+
11674
+ |xpath(x, a/b/text())|
11675
+ +--------------------+
11676
+ | [b1, b2, b3]|
11677
+ +--------------------+
11673
11678
"""
11674
11679
return _invoke_function_over_columns("xpath", xml, path)
11675
11680
@@ -11683,9 +11688,14 @@ def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11683
11688
11684
11689
Examples
11685
11690
--------
11691
+ >>> from pyspark.sql import functions as sf
11686
11692
>>> df = spark.createDataFrame([('<a><b>1</b></a>',)], ['x'])
11687
- >>> df.select(xpath_boolean(df.x, lit('a/b')).alias('r')).collect()
11688
- [Row(r=True)]
11693
+ >>> df.select(sf.xpath_boolean(df.x, sf.lit('a/b'))).show()
11694
+ +---------------------+
11695
+ |xpath_boolean(x, a/b)|
11696
+ +---------------------+
11697
+ | true|
11698
+ +---------------------+
11689
11699
"""
11690
11700
return _invoke_function_over_columns("xpath_boolean", xml, path)
11691
11701
@@ -11700,9 +11710,14 @@ def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11700
11710
11701
11711
Examples
11702
11712
--------
11713
+ >>> from pyspark.sql import functions as sf
11703
11714
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11704
- >>> df.select(xpath_double(df.x, lit('sum(a/b)')).alias('r')).collect()
11705
- [Row(r=3.0)]
11715
+ >>> df.select(sf.xpath_double(df.x, sf.lit('sum(a/b)'))).show()
11716
+ +-------------------------+
11717
+ |xpath_double(x, sum(a/b))|
11718
+ +-------------------------+
11719
+ | 3.0|
11720
+ +-------------------------+
11706
11721
"""
11707
11722
return _invoke_function_over_columns("xpath_double", xml, path)
11708
11723
@@ -11740,9 +11755,14 @@ def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11740
11755
11741
11756
Examples
11742
11757
--------
11758
+ >>> from pyspark.sql import functions as sf
11743
11759
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11744
- >>> df.select(xpath_float(df.x, lit('sum(a/b)')).alias('r')).collect()
11745
- [Row(r=3.0)]
11760
+ >>> df.select(sf.xpath_float(df.x, sf.lit('sum(a/b)'))).show()
11761
+ +------------------------+
11762
+ |xpath_float(x, sum(a/b))|
11763
+ +------------------------+
11764
+ | 3.0|
11765
+ +------------------------+
11746
11766
"""
11747
11767
return _invoke_function_over_columns("xpath_float", xml, path)
11748
11768
@@ -11757,9 +11777,14 @@ def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11757
11777
11758
11778
Examples
11759
11779
--------
11780
+ >>> from pyspark.sql import functions as sf
11760
11781
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11761
- >>> df.select(xpath_int(df.x, lit('sum(a/b)')).alias('r')).collect()
11762
- [Row(r=3)]
11782
+ >>> df.select(sf.xpath_int(df.x, sf.lit('sum(a/b)'))).show()
11783
+ +----------------------+
11784
+ |xpath_int(x, sum(a/b))|
11785
+ +----------------------+
11786
+ | 3|
11787
+ +----------------------+
11763
11788
"""
11764
11789
return _invoke_function_over_columns("xpath_int", xml, path)
11765
11790
@@ -11774,9 +11799,14 @@ def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11774
11799
11775
11800
Examples
11776
11801
--------
11802
+ >>> from pyspark.sql import functions as sf
11777
11803
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11778
- >>> df.select(xpath_long(df.x, lit('sum(a/b)')).alias('r')).collect()
11779
- [Row(r=3)]
11804
+ >>> df.select(sf.xpath_long(df.x, sf.lit('sum(a/b)'))).show()
11805
+ +-----------------------+
11806
+ |xpath_long(x, sum(a/b))|
11807
+ +-----------------------+
11808
+ | 3|
11809
+ +-----------------------+
11780
11810
"""
11781
11811
return _invoke_function_over_columns("xpath_long", xml, path)
11782
11812
@@ -11791,9 +11821,14 @@ def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11791
11821
11792
11822
Examples
11793
11823
--------
11824
+ >>> from pyspark.sql import functions as sf
11794
11825
>>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
11795
- >>> df.select(xpath_short(df.x, lit('sum(a/b)')).alias('r')).collect()
11796
- [Row(r=3)]
11826
+ >>> df.select(sf.xpath_short(df.x, sf.lit('sum(a/b)'))).show()
11827
+ +------------------------+
11828
+ |xpath_short(x, sum(a/b))|
11829
+ +------------------------+
11830
+ | 3|
11831
+ +------------------------+
11797
11832
"""
11798
11833
return _invoke_function_over_columns("xpath_short", xml, path)
11799
11834
@@ -11807,9 +11842,14 @@ def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
11807
11842
11808
11843
Examples
11809
11844
--------
11845
+ >>> from pyspark.sql import functions as sf
11810
11846
>>> df = spark.createDataFrame([('<a><b>b</b><c>cc</c></a>',)], ['x'])
11811
- >>> df.select(xpath_string(df.x, lit('a/c')).alias('r')).collect()
11812
- [Row(r='cc')]
11847
+ >>> df.select(sf.xpath_string(df.x, sf.lit('a/c'))).show()
11848
+ +--------------------+
11849
+ |xpath_string(x, a/c)|
11850
+ +--------------------+
11851
+ | cc|
11852
+ +--------------------+
11813
11853
"""
11814
11854
return _invoke_function_over_columns("xpath_string", xml, path)
11815
11855
0 commit comments