Skip to content

Commit ba3a887

Browse files
committed
HADOOP-19343: Add hadoop-gcp configuration to core-default.xml and ServiceLoader file.
Closes #7916 Signed-off-by: Shilun Fan <[email protected]>
1 parent 63f5493 commit ba3a887

File tree

4 files changed

+215
-15
lines changed

4 files changed

+215
-15
lines changed

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Lines changed: 193 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,7 @@
12851285

12861286
<property>
12871287
<name>fs.viewfs.overload.scheme.target.gs.impl</name>
1288-
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
1288+
<value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
12891289
<description>The GoogleHadoopFS/Google Cloud Storage file system for view
12901290
file system overload scheme when child file system and ViewFSOverloadScheme's
12911291
schemes are gs.
@@ -2373,12 +2373,6 @@ The switch to turn S3A auditing on or off.
23732373
otherwise fall back to hadoop.tmp.dir </description>
23742374
</property>
23752375

2376-
<property>
2377-
<name>fs.AbstractFileSystem.gs.impl</name>
2378-
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
2379-
<description>The AbstractFileSystem for gs: uris.</description>
2380-
</property>
2381-
23822376
<property>
23832377
<name>fs.azure.enable.readahead</name>
23842378
<value>true</value>
@@ -4509,4 +4503,196 @@ The switch to turn S3A auditing on or off.
45094503
If the value is less than or equal to 0, the cache is disabled entirely.
45104504
</description>
45114505
</property>
4506+
4507+
<property>
4508+
<name>fs.gs.impl</name>
4509+
<value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
4510+
<description>The FileSystem for gs: uris.</description>
4511+
</property>
4512+
4513+
<property>
4514+
<name>fs.AbstractFileSystem.gs.impl</name>
4515+
<value>org.apache.hadoop.fs.gs.Gs</value>
4516+
<description>The AbstractFileSystem for gs: uris.</description>
4517+
</property>
4518+
4519+
<property>
4520+
<name>fs.gs.project.id</name>
4521+
<description>
4522+
Google Cloud Project ID with access to Google Cloud Storage buckets.
4523+
Required only for list buckets and create bucket operations.
4524+
</description>
4525+
</property>
4526+
4527+
<property>
4528+
<name>fs.gs.working.dir</name>
4529+
<value>/</value>
4530+
<description>
4531+
The directory relative gs: uris resolve in inside the default bucket.
4532+
</description>
4533+
</property>
4534+
4535+
<property>
4536+
<name>fs.gs.rewrite.max.chunk.size</name>
4537+
<value>512m</value>
4538+
<description>
4539+
Maximum size of object chunk that will be rewritten in a single rewrite
4540+
request when fs.gs.copy.with.rewrite.enable is set to true.
4541+
</description>
4542+
</property>
4543+
4544+
<property>
4545+
<name>fs.gs.bucket.delete.enable</name>
4546+
<value>false</value>
4547+
<description>
4548+
If true, recursive delete on a path that refers to a Cloud Storage bucket
4549+
itself or delete on that path when it is empty will result in deletion of
4550+
the bucket itself. If false, any operation that normally would have
4551+
deleted the bucket will be ignored. Setting to false preserves the typical
4552+
behavior of rm -rf / which translates to deleting everything inside of
4553+
root, but without clobbering the filesystem authority corresponding to that
4554+
root path in the process.
4555+
</description>
4556+
</property>
4557+
4558+
<property>
4559+
<name>fs.gs.block.size</name>
4560+
<value>64m</value>
4561+
<description>
4562+
The reported block size of the file system. This does not change any
4563+
behavior of the connector or the underlying Google Cloud Storage objects.
4564+
However, it will affect the number of splits Hadoop MapReduce uses for a
4565+
given input.
4566+
</description>
4567+
</property>
4568+
4569+
<property>
4570+
<name>fs.gs.create.items.conflict.check.enable</name>
4571+
<value>true</value>
4572+
<description>
4573+
Enables a check that ensures that conflicting directories do not exist when
4574+
creating files and conflicting files do not exist when creating directories.
4575+
</description>
4576+
</property>
4577+
4578+
<property>
4579+
<name>fs.gs.marker.file.pattern</name>
4580+
<description>
4581+
If set, files that match specified pattern are copied last during folder
4582+
rename operation.
4583+
</description>
4584+
</property>
4585+
4586+
<property>
4587+
<name>fs.gs.auth.type</name>
4588+
<value>COMPUTE_ENGINE</value>
4589+
<description>
4590+
What type of authentication mechanism to use for Google Cloud Storage
4591+
access. Valid values: APPLICATION_DEFAULT, COMPUTE_ENGINE,
4592+
SERVICE_ACCOUNT_JSON_KEYFILE, UNAUTHENTICATED, USER_CREDENTIALS.
4593+
</description>
4594+
</property>
4595+
4596+
<property>
4597+
<name>fs.gs.auth.service.account.json.keyfile</name>
4598+
<description>
4599+
The path to the JSON keyfile for the service account when fs.gs.auth.type
4600+
property is set to SERVICE_ACCOUNT_JSON_KEYFILE. The file must exist at
4601+
the same path on all nodes
4602+
</description>
4603+
</property>
4604+
4605+
<property>
4606+
<name>fs.gs.auth.client.id</name>
4607+
<description>
4608+
The OAuth2 client ID.
4609+
</description>
4610+
</property>
4611+
4612+
<property>
4613+
<name>fs.gs.auth.client.secret</name>
4614+
<description>
4615+
The OAuth2 client secret.
4616+
</description>
4617+
</property>
4618+
4619+
<property>
4620+
<name>fs.gs.auth.refresh.token</name>
4621+
<description>
4622+
The refresh token.
4623+
</description>
4624+
</property>
4625+
4626+
<property>
4627+
<name>fs.gs.inputstream.support.gzip.encoding.enable</name>
4628+
<value>false</value>
4629+
<description>
4630+
If set to false then reading files with GZIP content encoding (HTTP header
4631+
Content-Encoding: gzip) will result in failure (IOException is thrown).
4632+
4633+
This feature is disabled by default because processing of
4634+
GZIP encoded files is inefficient and error-prone in Hadoop and Spark.
4635+
</description>
4636+
</property>
4637+
4638+
<property>
4639+
<name>fs.gs.outputstream.buffer.size</name>
4640+
<value>8m</value>
4641+
<description>
4642+
Write buffer size used by the file system API to send the data to be
4643+
uploaded to Cloud Storage upload thread via pipes. The various pipe types
4644+
are documented below.
4645+
</description>
4646+
</property>
4647+
4648+
<property>
4649+
<name>fs.gs.outputstream.sync.min.interval</name>
4650+
<value>0</value>
4651+
<description>
4652+
Output stream configuration that controls the minimum interval between
4653+
consecutive syncs. This allows to avoid getting rate-limited by Google Cloud
4654+
Storage. Default is 0 - no wait between syncs. Note that hflush() will
4655+
be no-op if called more frequently than minimum sync interval and hsync()
4656+
will block until an end of a min sync interval.
4657+
</description>
4658+
</property>
4659+
4660+
<property>
4661+
<name>fs.gs.inputstream.fadvise</name>
4662+
<value>AUTO</value>
4663+
<description>
4664+
Tunes reading objects behavior to optimize HTTP GET requests for various use
4665+
cases. Valid values: SEQUENTIAL, RANDOM, AUTO, AUTO_RANDOM.
4666+
</description>
4667+
</property>
4668+
4669+
<property>
4670+
<name>fs.gs.fadvise.request.track.count</name>
4671+
<value>3</value>
4672+
<description>
4673+
Self adaptive fadvise mode uses distance between the served requests to
4674+
decide the access pattern. This property controls how many such requests
4675+
need to be tracked. It is used when AUTO_RANDOM is selected.
4676+
</description>
4677+
</property>
4678+
4679+
<property>
4680+
<name>fs.gs.inputstream.inplace.seek.limit</name>
4681+
<value>8m</value>
4682+
<description>
4683+
If forward seeks are within this many bytes of the current position, seeks
4684+
are performed by reading and discarding bytes in-place rather than opening a
4685+
new underlying stream.
4686+
</description>
4687+
</property>
4688+
4689+
<property>
4690+
<name>fs.gs.inputstream.min.range.request.size</name>
4691+
<value>2m</value>
4692+
<description>
4693+
Minimum size in bytes of the read range for Cloud Storage request when
4694+
opening a new stream to read an object.
4695+
</description>
4696+
</property>
4697+
45124698
</configuration>

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ public void initializeMemberVariables() {
149149
xmlPropsToSkipCompare.add("fs.azure.saskey.usecontainersaskeyforallaccess");
150150
xmlPropsToSkipCompare.add("fs.azure.user.agent.prefix");
151151

152+
// GS properties are in a different class
153+
// - org.apache.hadoop.fs.gs.GoogleHadoopFileSystemConfiguration
154+
xmlPrefixToSkipCompare.add("gs.");
155+
xmlPrefixToSkipCompare.add("fs.gs.");
156+
xmlPropsToSkipCompare.add("fs.AbstractFileSystem.gs.impl");
157+
152158
// Properties in enable callqueue overflow trigger failover for stateless servers.
153159
xmlPropsToSkipCompare.add("ipc.[port_number].callqueue.overflow.trigger.failover");
154160
xmlPropsToSkipCompare.add("ipc.callqueue.overflow.trigger.failover");
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
org.apache.hadoop.fs.gs.GoogleHadoopFileSystem

hadoop-tools/hadoop-gcp/src/test/resources/core-site.xml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,6 @@
3333
<name>hadoop.security.authentication</name>
3434
<value>simple</value>
3535
</property>
36-
<property>
37-
<name>fs.gs.impl</name>
38-
<value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value>
39-
</property>
40-
<property>
41-
<name>fs.AbstractFileSystem.gs.impl</name>
42-
<value>org.apache.hadoop.fs.gs.Gs</value>
43-
</property>
4436

4537
<!--
4638
To run these tests.

0 commit comments

Comments
 (0)