|
1285 | 1285 |
|
1286 | 1286 | <property>
|
1287 | 1287 | <name>fs.viewfs.overload.scheme.target.gs.impl</name>
|
1288 |
| - <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value> |
| 1288 | + <value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value> |
1289 | 1289 | <description>The GoogleHadoopFS/Google Cloud Storage file system for view
|
1290 | 1290 | file system overload scheme when child file system and ViewFSOverloadScheme's
|
1291 | 1291 | schemes are gs.
|
@@ -2373,12 +2373,6 @@ The switch to turn S3A auditing on or off.
|
2373 | 2373 | otherwise fall back to hadoop.tmp.dir </description>
|
2374 | 2374 | </property>
|
2375 | 2375 |
|
2376 |
| -<property> |
2377 |
| - <name>fs.AbstractFileSystem.gs.impl</name> |
2378 |
| - <value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value> |
2379 |
| - <description>The AbstractFileSystem for gs: uris.</description> |
2380 |
| -</property> |
2381 |
| - |
2382 | 2376 | <property>
|
2383 | 2377 | <name>fs.azure.enable.readahead</name>
|
2384 | 2378 | <value>true</value>
|
@@ -4509,4 +4503,196 @@ The switch to turn S3A auditing on or off.
|
4509 | 4503 | If the value is less than or equal to 0, the cache is disabled entirely.
|
4510 | 4504 | </description>
|
4511 | 4505 | </property>
|
| 4506 | + |
| 4507 | + <property> |
| 4508 | + <name>fs.gs.impl</name> |
| 4509 | + <value>org.apache.hadoop.fs.gs.GoogleHadoopFileSystem</value> |
| 4510 | + <description>The FileSystem for gs: uris.</description> |
| 4511 | + </property> |
| 4512 | + |
| 4513 | + <property> |
| 4514 | + <name>fs.AbstractFileSystem.gs.impl</name> |
| 4515 | + <value>org.apache.hadoop.fs.gs.Gs</value> |
| 4516 | + <description>The AbstractFileSystem for gs: uris.</description> |
| 4517 | + </property> |
| 4518 | + |
| 4519 | + <property> |
| 4520 | + <name>fs.gs.project.id</name> |
| 4521 | + <description> |
| 4522 | + Google Cloud Project ID with access to Google Cloud Storage buckets. |
| 4523 | + Required only for list buckets and create bucket operations. |
| 4524 | + </description> |
| 4525 | + </property> |
| 4526 | + |
| 4527 | + <property> |
| 4528 | + <name>fs.gs.working.dir</name> |
| 4529 | + <value>/</value> |
| 4530 | + <description> |
| 4531 | + The directory relative gs: uris resolve in inside the default bucket. |
| 4532 | + </description> |
| 4533 | + </property> |
| 4534 | + |
| 4535 | + <property> |
| 4536 | + <name>fs.gs.rewrite.max.chunk.size</name> |
| 4537 | + <value>512m</value> |
| 4538 | + <description> |
| 4539 | + Maximum size of object chunk that will be rewritten in a single rewrite |
| 4540 | + request when fs.gs.copy.with.rewrite.enable is set to true. |
| 4541 | + </description> |
| 4542 | + </property> |
| 4543 | + |
| 4544 | + <property> |
| 4545 | + <name>fs.gs.bucket.delete.enable</name> |
| 4546 | + <value>false</value> |
| 4547 | + <description> |
| 4548 | + If true, recursive delete on a path that refers to a Cloud Storage bucket |
| 4549 | + itself or delete on that path when it is empty will result in deletion of |
| 4550 | + the bucket itself. If false, any operation that normally would have |
| 4551 | + deleted the bucket will be ignored. Setting to false preserves the typical |
| 4552 | + behavior of rm -rf / which translates to deleting everything inside of |
| 4553 | + root, but without clobbering the filesystem authority corresponding to that |
| 4554 | + root path in the process. |
| 4555 | + </description> |
| 4556 | + </property> |
| 4557 | + |
| 4558 | + <property> |
| 4559 | + <name>fs.gs.block.size</name> |
| 4560 | + <value>64m</value> |
| 4561 | + <description> |
| 4562 | + The reported block size of the file system. This does not change any |
| 4563 | + behavior of the connector or the underlying Google Cloud Storage objects. |
| 4564 | + However, it will affect the number of splits Hadoop MapReduce uses for a |
| 4565 | + given input. |
| 4566 | + </description> |
| 4567 | + </property> |
| 4568 | + |
| 4569 | + <property> |
| 4570 | + <name>fs.gs.create.items.conflict.check.enable</name> |
| 4571 | + <value>true</value> |
| 4572 | + <description> |
| 4573 | + Enables a check that ensures that conflicting directories do not exist when |
| 4574 | + creating files and conflicting files do not exist when creating directories. |
| 4575 | + </description> |
| 4576 | + </property> |
| 4577 | + |
| 4578 | + <property> |
| 4579 | + <name>fs.gs.marker.file.pattern</name> |
| 4580 | + <description> |
| 4581 | + If set, files that match specified pattern are copied last during folder |
| 4582 | + rename operation. |
| 4583 | + </description> |
| 4584 | + </property> |
| 4585 | + |
| 4586 | + <property> |
| 4587 | + <name>fs.gs.auth.type</name> |
| 4588 | + <value>COMPUTE_ENGINE</value> |
| 4589 | + <description> |
| 4590 | + What type of authentication mechanism to use for Google Cloud Storage |
| 4591 | + access. Valid values: APPLICATION_DEFAULT, COMPUTE_ENGINE, |
| 4592 | + SERVICE_ACCOUNT_JSON_KEYFILE, UNAUTHENTICATED, USER_CREDENTIALS. |
| 4593 | + </description> |
| 4594 | + </property> |
| 4595 | + |
| 4596 | + <property> |
| 4597 | + <name>fs.gs.auth.service.account.json.keyfile</name> |
| 4598 | + <description> |
| 4599 | + The path to the JSON keyfile for the service account when fs.gs.auth.type |
| 4600 | + property is set to SERVICE_ACCOUNT_JSON_KEYFILE. The file must exist at |
| 4601 | + the same path on all nodes |
| 4602 | + </description> |
| 4603 | + </property> |
| 4604 | + |
| 4605 | + <property> |
| 4606 | + <name>fs.gs.auth.client.id</name> |
| 4607 | + <description> |
| 4608 | + The OAuth2 client ID. |
| 4609 | + </description> |
| 4610 | + </property> |
| 4611 | + |
| 4612 | + <property> |
| 4613 | + <name>fs.gs.auth.client.secret</name> |
| 4614 | + <description> |
| 4615 | + The OAuth2 client secret. |
| 4616 | + </description> |
| 4617 | + </property> |
| 4618 | + |
| 4619 | + <property> |
| 4620 | + <name>fs.gs.auth.refresh.token</name> |
| 4621 | + <description> |
| 4622 | + The refresh token. |
| 4623 | + </description> |
| 4624 | + </property> |
| 4625 | + |
| 4626 | + <property> |
| 4627 | + <name>fs.gs.inputstream.support.gzip.encoding.enable</name> |
| 4628 | + <value>false</value> |
| 4629 | + <description> |
| 4630 | + If set to false then reading files with GZIP content encoding (HTTP header |
| 4631 | + Content-Encoding: gzip) will result in failure (IOException is thrown). |
| 4632 | + |
| 4633 | + This feature is disabled by default because processing of |
| 4634 | + GZIP encoded files is inefficient and error-prone in Hadoop and Spark. |
| 4635 | + </description> |
| 4636 | + </property> |
| 4637 | + |
| 4638 | + <property> |
| 4639 | + <name>fs.gs.outputstream.buffer.size</name> |
| 4640 | + <value>8m</value> |
| 4641 | + <description> |
| 4642 | + Write buffer size used by the file system API to send the data to be |
| 4643 | + uploaded to Cloud Storage upload thread via pipes. The various pipe types |
| 4644 | + are documented below. |
| 4645 | + </description> |
| 4646 | + </property> |
| 4647 | + |
| 4648 | + <property> |
| 4649 | + <name>fs.gs.outputstream.sync.min.interval</name> |
| 4650 | + <value>0</value> |
| 4651 | + <description> |
| 4652 | + Output stream configuration that controls the minimum interval between |
| 4653 | + consecutive syncs. This allows to avoid getting rate-limited by Google Cloud |
| 4654 | + Storage. Default is 0 - no wait between syncs. Note that hflush() will |
| 4655 | + be no-op if called more frequently than minimum sync interval and hsync() |
| 4656 | + will block until an end of a min sync interval. |
| 4657 | + </description> |
| 4658 | + </property> |
| 4659 | + |
| 4660 | + <property> |
| 4661 | + <name>fs.gs.inputstream.fadvise</name> |
| 4662 | + <value>AUTO</value> |
| 4663 | + <description> |
| 4664 | + Tunes reading objects behavior to optimize HTTP GET requests for various use |
| 4665 | + cases. Valid values: SEQUENTIAL, RANDOM, AUTO, AUTO_RANDOM. |
| 4666 | + </description> |
| 4667 | + </property> |
| 4668 | + |
| 4669 | + <property> |
| 4670 | + <name>fs.gs.fadvise.request.track.count</name> |
| 4671 | + <value>3</value> |
| 4672 | + <description> |
| 4673 | + Self adaptive fadvise mode uses distance between the served requests to |
| 4674 | + decide the access pattern. This property controls how many such requests |
| 4675 | + need to be tracked. It is used when AUTO_RANDOM is selected. |
| 4676 | + </description> |
| 4677 | + </property> |
| 4678 | + |
| 4679 | + <property> |
| 4680 | + <name>fs.gs.inputstream.inplace.seek.limit</name> |
| 4681 | + <value>8m</value> |
| 4682 | + <description> |
| 4683 | + If forward seeks are within this many bytes of the current position, seeks |
| 4684 | + are performed by reading and discarding bytes in-place rather than opening a |
| 4685 | + new underlying stream. |
| 4686 | + </description> |
| 4687 | + </property> |
| 4688 | + |
| 4689 | + <property> |
| 4690 | + <name>fs.gs.inputstream.min.range.request.size</name> |
| 4691 | + <value>2m</value> |
| 4692 | + <description> |
| 4693 | + Minimum size in bytes of the read range for Cloud Storage request when |
| 4694 | + opening a new stream to read an object. |
| 4695 | + </description> |
| 4696 | + </property> |
| 4697 | + |
4512 | 4698 | </configuration>
|
0 commit comments