Skip to content

Commit e6b73d4

Browse files
author
James Lee
committed
add AirportsByLatitudeSolution and AirportsByLatitudeProblem
1 parent 86444bb commit e6b73d4

File tree

6 files changed

+84
-25
lines changed

6 files changed

+84
-25
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.sparkTutorial.rdd.airports;
2+
3+
public class AirportsByLatitudeProblem {
4+
5+
public static void main(String[] args) throws Exception {
6+
7+
/* Create a Spark program to read the airport data from in/airports.text, find all the airports whose latitude are bigger than 40.
8+
Then output the airport's name and the airport's latitude to out/airports_by_latitude.text.
9+
10+
Each row of the input file contains the following columns:
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
12+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
13+
14+
Sample output:
15+
"St Anthony", 51.391944
16+
"Tofino", 49.082222
17+
...
18+
*/
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.sparkTutorial.rdd.airports;
2+
3+
import com.sparkTutorial.rdd.commons.Utils;
4+
import org.apache.commons.lang.StringUtils;
5+
import org.apache.spark.SparkConf;
6+
import org.apache.spark.api.java.JavaRDD;
7+
import org.apache.spark.api.java.JavaSparkContext;
8+
9+
public class AirportsByLatitudeSolution {
10+
11+
public static void main(String[] args) throws Exception {
12+
13+
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[2]");
14+
15+
JavaSparkContext sc = new JavaSparkContext(conf);
16+
17+
JavaRDD<String> airports = sc.textFile("in/airports.text");
18+
19+
JavaRDD<String> airportsInUSA = airports.filter(line -> Float.valueOf(line.split(Utils.COMMA_DELIMITER)[6]) > 40);
20+
21+
JavaRDD<String> airportsNameAndCityNames = airportsInUSA.map(line -> {
22+
String[] splits = line.split(Utils.COMMA_DELIMITER);
23+
return StringUtils.join(new String[]{splits[1], splits[6]}, ",");
24+
}
25+
);
26+
airportsNameAndCityNames.saveAsTextFile("out/airports_by_latitude.text");
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.sparkTutorial.rdd.airports;
2+
3+
public class AirportsInUsaProblem {
4+
5+
public static void main(String[] args) throws Exception {
6+
7+
/* Create a Spark program to read the airport data from in/airports.text, find all the airports which are located in United States
8+
and output the airport's name and the city's name to out/airports_in_usa.text.
9+
10+
Each row of the input file contains the following columns:
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
12+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
13+
14+
Sample output:
15+
"Putnam County Airport", "Greencastle"
16+
"Dowagiac Municipal Airport", "Dowagiac"
17+
...
18+
*/
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
11
package com.sparkTutorial.rdd.airports;
22

3+
import com.sparkTutorial.rdd.commons.Utils;
34
import org.apache.commons.lang.StringUtils;
45
import org.apache.spark.SparkConf;
56
import org.apache.spark.api.java.JavaRDD;
67
import org.apache.spark.api.java.JavaSparkContext;
78

8-
public class AirportsSolution {
9+
public class AirportsInUsaSolution {
910

1011
public static void main(String[] args) throws Exception {
1112

12-
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[*]");
13+
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[2]");
1314

1415
JavaSparkContext sc = new JavaSparkContext(conf);
1516

1617
JavaRDD<String> airports = sc.textFile("in/airports.text");
1718

18-
JavaRDD<String> airportsInUSA = airports.filter(line -> line.split(",")[3].equals("\"United States\""));
19+
JavaRDD<String> airportsInUSA = airports.filter(line -> line.split(Utils.COMMA_DELIMITER)[3].equals("\"United States\""));
1920

2021
JavaRDD<String> airportsNameAndCityNames = airportsInUSA.map(line -> {
21-
String[] splits = line.split(",");
22+
String[] splits = line.split(Utils.COMMA_DELIMITER);
2223
return StringUtils.join(new String[]{splits[1], splits[2]}, ",");
2324
}
2425
);
25-
airportsNameAndCityNames.saveAsTextFile("out/airports.text");
26+
airportsNameAndCityNames.saveAsTextFile("out/airports_in_usa.text");
2627
}
2728
}

src/main/java/com/sparkTutorial/rdd/airports/AirportsProblem.java

-20
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package com.sparkTutorial.rdd.commons;
2+
3+
public class Utils {
4+
5+
private Utils(){
6+
};
7+
8+
// a regular expression which matches commas but not commas within double quotations
9+
public static final String COMMA_DELIMITER = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
10+
}

0 commit comments

Comments
 (0)