@@ -41,10 +41,10 @@ object SparkSubmit {
41
41
private var clusterManager : Int = LOCAL
42
42
43
43
/**
44
- * A special jar name that indicates the class being run is inside of Spark itself,
45
- * and therefore no user jar is needed.
44
+ * Special primary resource names that represent shells rather than application jars.
46
45
*/
47
- private val RESERVED_JAR_NAME = " spark-internal"
46
+ private val SPARK_SHELL = " spark-shell"
47
+ private val PYSPARK_SHELL = " pyspark-shell"
48
48
49
49
def main (args : Array [String ]) {
50
50
val appArgs = new SparkSubmitArguments (args)
@@ -71,8 +71,8 @@ object SparkSubmit {
71
71
* entries for the child, a list of system properties, a list of env vars
72
72
* and the main class for the child
73
73
*/
74
- private [spark] def createLaunchEnv (args : SparkSubmitArguments ): ( ArrayBuffer [ String ],
75
- ArrayBuffer [String ], Map [String , String ], String ) = {
74
+ private [spark] def createLaunchEnv (args : SparkSubmitArguments )
75
+ : ( ArrayBuffer [ String ], ArrayBuffer [String ], Map [String , String ], String ) = {
76
76
if (args.master.startsWith(" local" )) {
77
77
clusterManager = LOCAL
78
78
} else if (args.master.startsWith(" yarn" )) {
@@ -121,24 +121,30 @@ object SparkSubmit {
121
121
printErrorAndExit(" Cannot currently run driver on the cluster in Mesos" )
122
122
}
123
123
124
- // If we're running a Python app, set the Java class to run to be our PythonRunner, add
125
- // Python files to deployment list, and pass the main file and Python path to PythonRunner
124
+ // If we're running a python app, set the main class to our specific python runner
126
125
if (isPython) {
127
126
if (deployOnCluster) {
128
127
printErrorAndExit(" Cannot currently run Python driver programs on cluster" )
129
128
}
130
- args.mainClass = " org.apache.spark.deploy.PythonRunner"
131
- args.files = mergeFileLists(args.files, args.pyFiles, args.primaryResource)
129
+ if (args.primaryResource == PYSPARK_SHELL ) {
130
+ args.mainClass = " py4j.GatewayServer"
131
+ args.childArgs ++= ArrayBuffer (" --die-on-broken-pipe" , " 0" )
132
+ } else {
133
+ // If a python file is provided, add it to the child arguments and list of files to deploy.
134
+ // Usage: PythonAppRunner <main python file> <extra python files> [app arguments]
135
+ args.mainClass = " org.apache.spark.deploy.PythonRunner"
136
+ args.childArgs = ArrayBuffer (args.primaryResource, args.pyFiles) ++ args.childArgs
137
+ args.files = Utils .mergeFileLists(args.files, args.primaryResource)
138
+ }
132
139
val pyFiles = Option (args.pyFiles).getOrElse(" " )
133
- args.childArgs = ArrayBuffer (args.primaryResource, pyFiles) ++ args.childArgs
134
- args.primaryResource = RESERVED_JAR_NAME
140
+ args.files = Utils .mergeFileLists(args.files, pyFiles)
135
141
sysProps(" spark.submit.pyFiles" ) = pyFiles
136
142
}
137
143
138
144
// If we're deploying into YARN, use yarn.Client as a wrapper around the user class
139
145
if (! deployOnCluster) {
140
146
childMainClass = args.mainClass
141
- if (args.primaryResource != RESERVED_JAR_NAME ) {
147
+ if (isUserJar( args.primaryResource) ) {
142
148
childClasspath += args.primaryResource
143
149
}
144
150
} else if (clusterManager == YARN ) {
@@ -219,7 +225,7 @@ object SparkSubmit {
219
225
// For python files, the primary resource is already distributed as a regular file
220
226
if (! isYarnCluster && ! isPython) {
221
227
var jars = sysProps.get(" spark.jars" ).map(x => x.split(" ," ).toSeq).getOrElse(Seq ())
222
- if (args.primaryResource != RESERVED_JAR_NAME ) {
228
+ if (isUserJar( args.primaryResource) ) {
223
229
jars = jars ++ Seq (args.primaryResource)
224
230
}
225
231
sysProps.put(" spark.jars" , jars.mkString(" ," ))
@@ -293,8 +299,8 @@ object SparkSubmit {
293
299
}
294
300
295
301
private def addJarToClasspath (localJar : String , loader : ExecutorURLClassLoader ) {
296
- val localJarFile = new File (new URI (localJar).getPath() )
297
- if (! localJarFile.exists() ) {
302
+ val localJarFile = new File (new URI (localJar).getPath)
303
+ if (! localJarFile.exists) {
298
304
printWarning(s " Jar $localJar does not exist, skipping. " )
299
305
}
300
306
@@ -303,14 +309,24 @@ object SparkSubmit {
303
309
}
304
310
305
311
/**
306
- * Merge a sequence of comma-separated file lists, some of which may be null to indicate
307
- * no files, into a single comma-separated string.
312
+ * Return whether the given primary resource represents a user jar.
313
+ */
314
+ private def isUserJar (primaryResource : String ): Boolean = {
315
+ ! isShell(primaryResource) && ! isPython(primaryResource)
316
+ }
317
+
318
+ /**
319
+ * Return whether the given primary resource represents a shell.
320
+ */
321
+ private def isShell (primaryResource : String ): Boolean = {
322
+ primaryResource == SPARK_SHELL || primaryResource == PYSPARK_SHELL
323
+ }
324
+
325
+ /**
326
+ * Return whether the given primary resource requires running python.
308
327
*/
309
- private [spark] def mergeFileLists (lists : String * ): String = {
310
- val merged = lists.filter(_ != null )
311
- .flatMap(_.split(" ," ))
312
- .mkString(" ," )
313
- if (merged == " " ) null else merged
328
+ private [spark] def isPython (primaryResource : String ): Boolean = {
329
+ primaryResource.endsWith(" .py" ) || primaryResource == PYSPARK_SHELL
314
330
}
315
331
}
316
332
0 commit comments