Skip to content

Commit 06eb138

Browse files
committed
Use shlex instead of writing our own parser
1 parent 05879fa commit 06eb138

File tree

1 file changed

+2
-28
lines changed

1 file changed

+2
-28
lines changed

python/pyspark/java_gateway.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import os
1919
import sys
2020
import signal
21+
import shlex
2122
import platform
2223
from subprocess import Popen, PIPE
2324
from threading import Thread
@@ -35,7 +36,7 @@ def launch_gateway():
3536
on_windows = platform.system() == "Windows"
3637
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
3738
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
38-
submit_args = split_preserve_quotes(submit_args)
39+
submit_args = shlex.split(submit_args)
3940
command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args
4041
if not on_windows:
4142
# Don't send ctrl-c / SIGINT to the Java gateway:
@@ -76,30 +77,3 @@ def run(self):
7677
java_import(gateway.jvm, "scala.Tuple2")
7778

7879
return gateway
79-
80-
def split_preserve_quotes(args):
81-
"""
82-
Given a string of space-delimited arguments with quotes,
83-
split it into a list while preserving the quote boundaries.
84-
"""
85-
if args is None:
86-
return []
87-
split_list = []
88-
quoted_string = ""
89-
wait_for_quote = False
90-
for arg in args.split(" "):
91-
if not wait_for_quote:
92-
if arg.startswith("\""):
93-
wait_for_quote = True
94-
quoted_string = arg
95-
else:
96-
split_list.append(arg)
97-
else:
98-
quoted_string += " " + arg
99-
if quoted_string.endswith("\""):
100-
# Strip quotes
101-
quoted_string = quoted_string[1:-1]
102-
split_list.append(quoted_string)
103-
quoted_string = ""
104-
wait_for_quote = False
105-
return split_list

0 commit comments

Comments
 (0)