-
Notifications
You must be signed in to change notification settings - Fork 9.1k
HADOOP-14837 : Support Read Restored Glacier Objects #6407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from all commits
403f50f
6110c7c
de6d1b0
04548bb
863a5db
c152dc2
c1c72d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,6 +113,7 @@ | |
import org.apache.hadoop.fs.Options; | ||
import org.apache.hadoop.fs.impl.OpenFileParameters; | ||
import org.apache.hadoop.fs.permission.FsAction; | ||
import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter; | ||
import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; | ||
import org.apache.hadoop.fs.s3a.auth.SignerManager; | ||
import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations; | ||
|
@@ -444,6 +445,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, | |
*/ | ||
private boolean isCSEEnabled; | ||
|
||
/** | ||
* {@link S3ObjectStorageClassFilter} will filter the S3 files based on the | ||
* {@code fs.s3a.glacier.read.restored.objects} configuration. | ||
*/ | ||
private S3ObjectStorageClassFilter s3ObjectStorageClassFilter; | ||
|
||
/** | ||
* Bucket AccessPoint. | ||
*/ | ||
|
@@ -585,6 +592,18 @@ public void initialize(URI name, Configuration originalConf) | |
|
||
s3aInternals = createS3AInternals(); | ||
|
||
try { | ||
s3ObjectStorageClassFilter = Optional.of(conf.getTrimmed(READ_RESTORED_GLACIER_OBJECTS, | ||
DEFAULT_READ_RESTORED_GLACIER_OBJECTS)) | ||
.map(String::toUpperCase) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Locale.ROOT) |
||
.map(S3ObjectStorageClassFilter::valueOf).get(); | ||
} catch (IllegalArgumentException e) { | ||
LOG.warn("Invalid value for the config {} is set. Valid values are:" + | ||
"READ_ALL, SKIP_ALL_GLACIER, READ_RESTORED_GLACIER_OBJECTS. Defaulting to READ_ALL", | ||
READ_RESTORED_GLACIER_OBJECTS); | ||
s3ObjectStorageClassFilter = S3ObjectStorageClassFilter.READ_ALL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets fall to the default. maybe pull the conf.getTrimmed() out of the try {} so it's value can be printed too. FWIW in #6789 I'm doing a |
||
} | ||
|
||
// look for encryption data | ||
// DT Bindings may override this | ||
setEncryptionSecrets( | ||
|
@@ -5686,6 +5705,7 @@ public StoreContext createStoreContext() { | |
.setContextAccessors(new ContextAccessorsImpl()) | ||
.setAuditor(getAuditor()) | ||
.setEnableCSE(isCSEEnabled) | ||
.setS3ObjectStorageClassFilter(s3ObjectStorageClassFilter) | ||
.build(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.fs.s3a.api; | ||
|
||
import java.util.Set; | ||
import java.util.function.Function; | ||
|
||
import software.amazon.awssdk.services.s3.model.ObjectStorageClass; | ||
import software.amazon.awssdk.services.s3.model.S3Object; | ||
|
||
import org.apache.hadoop.fs.s3a.S3AFileSystem; | ||
import org.apache.hadoop.util.Sets; | ||
|
||
|
||
/** | ||
* <pre> | ||
* {@link S3ObjectStorageClassFilter} will filter the S3 files based on the | ||
* {@code fs.s3a.glacier.read.restored.objects} configuration set in {@link S3AFileSystem} | ||
* The config can have 3 values: | ||
* {@code READ_ALL}: Retrieval of Glacier files will fail with InvalidObjectStateException: | ||
* The operation is not valid for the object's storage class. | ||
* {@code SKIP_ALL_GLACIER}: If this value is set then this will ignore any S3 Objects which are | ||
* tagged with Glacier storage classes and retrieve the others. | ||
* {@code READ_RESTORED_GLACIER_OBJECTS}: If this value is set then restored status of the Glacier | ||
* object will be checked, if restored the objects would be read like normal S3 objects | ||
* else they will be ignored as the objects would not have been retrieved from the S3 Glacier. | ||
* </pre> | ||
*/ | ||
public enum S3ObjectStorageClassFilter { | ||
READ_ALL(o -> true), | ||
SKIP_ALL_GLACIER(S3ObjectStorageClassFilter::isNotGlacierObject), | ||
READ_RESTORED_GLACIER_OBJECTS(S3ObjectStorageClassFilter::isCompletedRestoredObject); | ||
|
||
private static final Set<ObjectStorageClass> GLACIER_STORAGE_CLASSES = Sets.newHashSet( | ||
ObjectStorageClass.GLACIER, ObjectStorageClass.DEEP_ARCHIVE); | ||
|
||
private final Function<S3Object, Boolean> filter; | ||
steveloughran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
S3ObjectStorageClassFilter(Function<S3Object, Boolean> filter) { | ||
steveloughran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.filter = filter; | ||
} | ||
|
||
/** | ||
* Checks if the s3 object is not an object with a storage class of glacier/deep_archive. | ||
* @param object s3 object | ||
* @return if the s3 object is not an object with a storage class of glacier/deep_archive | ||
*/ | ||
private static boolean isNotGlacierObject(S3Object object) { | ||
return !GLACIER_STORAGE_CLASSES.contains(object.storageClass()); | ||
} | ||
|
||
/** | ||
* Checks if the s3 object is an object with a storage class of glacier/deep_archive. | ||
* @param object s3 object | ||
* @return if the s3 object is an object with a storage class of glacier/deep_archive | ||
*/ | ||
private static boolean isGlacierObject(S3Object object) { | ||
return GLACIER_STORAGE_CLASSES.contains(object.storageClass()); | ||
} | ||
|
||
/** | ||
* Checks if the s3 object is completely restored. | ||
* @param object s3 object | ||
* @return if the s3 object is completely restored | ||
*/ | ||
private static boolean isCompletedRestoredObject(S3Object object) { | ||
if(isGlacierObject(object)) { | ||
return object.restoreStatus() != null && !object.restoreStatus().isRestoreInProgress(); | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
* Returns the filter function set as part of the enum definition | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs a trailing . |
||
* @return the filter function set as part of the enum definition | ||
*/ | ||
public Function<S3Object, Boolean> getFilter() { | ||
return filter; | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,13 +26,13 @@ | |
import java.util.concurrent.ExecutorService; | ||
|
||
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; | ||
|
||
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; | ||
import org.apache.hadoop.classification.InterfaceAudience; | ||
import org.apache.hadoop.classification.InterfaceStability; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.fs.s3a.api.RequestFactory; | ||
import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter; | ||
import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; | ||
import org.apache.hadoop.fs.s3a.Invoker; | ||
import org.apache.hadoop.fs.s3a.S3AFileStatus; | ||
|
@@ -117,6 +117,8 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> { | |
/** Is client side encryption enabled? */ | ||
private final boolean isCSEEnabled; | ||
|
||
private final S3ObjectStorageClassFilter s3ObjectStorageClassFilter; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: javadocs |
||
|
||
/** | ||
* Instantiate. | ||
*/ | ||
|
@@ -137,7 +139,8 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> { | |
final boolean useListV1, | ||
final ContextAccessors contextAccessors, | ||
final AuditSpanSource<AuditSpanS3A> auditor, | ||
final boolean isCSEEnabled) { | ||
final boolean isCSEEnabled, | ||
final S3ObjectStorageClassFilter s3ObjectStorageClassFilter) { | ||
this.fsURI = fsURI; | ||
this.bucket = bucket; | ||
this.configuration = configuration; | ||
|
@@ -158,6 +161,7 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> { | |
this.contextAccessors = contextAccessors; | ||
this.auditor = auditor; | ||
this.isCSEEnabled = isCSEEnabled; | ||
this.s3ObjectStorageClassFilter = s3ObjectStorageClassFilter; | ||
} | ||
|
||
public URI getFsURI() { | ||
|
@@ -411,4 +415,13 @@ public RequestFactory getRequestFactory() { | |
public boolean isCSEEnabled() { | ||
return isCSEEnabled; | ||
} | ||
|
||
/** | ||
* Return the S3ObjectStorageClassFilter object for S3A, | ||
* whose value is set according to the config {@code fs.s3a.glacier.read.restored.objects}. | ||
* @return {@link S3ObjectStorageClassFilter} object | ||
*/ | ||
public S3ObjectStorageClassFilter getS3ObjectsStorageClassFilter() { | ||
return s3ObjectStorageClassFilter; | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.