Skip to content

Commit 8cbb5a7

Browse files
顾鹏顾鹏
authored andcommitted
HDFS-17223. Add journalnode maintenance node list
1 parent 4c04a67 commit 8cbb5a7

File tree

9 files changed

+140
-9
lines changed

9 files changed

+140
-9
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
14661466
"dfs.journalnode.edit-cache-size.fraction";
14671467
public static final float DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT = 0.5f;
14681468

1469+
public static final String DFS_JOURNALNODE_MAINTENANCE_NODES_KEY =
1470+
"dfs.journalnode.maintenance.nodes";
1471+
public static final String[] DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT = {};
1472+
14691473
// Journal-node related configs for the client side.
14701474
public static final String DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY = "dfs.qjournal.queued-edits.limit.mb";
14711475
public static final int DFS_QJOURNAL_QUEUE_SIZE_LIMIT_DEFAULT = 10;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import java.net.UnknownHostException;
4949

5050
import java.security.SecureRandom;
51+
import java.util.ArrayList;
5152
import java.util.Arrays;
5253
import java.util.Collection;
5354
import java.util.Comparator;
@@ -70,6 +71,7 @@
7071
import org.apache.hadoop.classification.InterfaceStability;
7172
import org.apache.hadoop.fs.ParentNotDirectoryException;
7273
import org.apache.hadoop.fs.UnresolvedLinkException;
74+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
7375
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7476
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
7577
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
@@ -1982,4 +1984,27 @@ public static void addTransferRateMetric(final DataNodeMetrics metrics, final lo
19821984
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
19831985
}
19841986
}
1987+
1988+
/**
1989+
* Retrieve InetSocketAddress array by ip port string array.
1990+
* @param nodesHostPort ip port string array
1991+
*/
1992+
public static HostSet getInetSocketAddress(String[] nodesHostPort) {
1993+
HostSet retSet = new HostSet();
1994+
for (String hostPort : nodesHostPort) {
1995+
try {
1996+
URI uri = new URI("dummy", hostPort, null, null, null);
1997+
int port = uri.getPort() == -1 ? 0 : uri.getPort();
1998+
InetSocketAddress inetSocketAddress = new InetSocketAddress(uri.getHost(), port);
1999+
if (inetSocketAddress.isUnresolved()) {
2000+
LOG.warn(String.format("Failed to resolve address `%s`", hostPort));
2001+
continue;
2002+
}
2003+
retSet.add(inetSocketAddress);
2004+
} catch (URISyntaxException e) {
2005+
LOG.warn(String.format("Failed to parse `%s`", hostPort));
2006+
}
2007+
}
2008+
return retSet;
2009+
}
19852010
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,16 @@ class AsyncLoggerSet {
5353

5454
private static final long INVALID_EPOCH = -1;
5555
private long myEpoch = INVALID_EPOCH;
56+
private final int majoritySize;
5657

5758
public AsyncLoggerSet(List<AsyncLogger> loggers) {
5859
this.loggers = ImmutableList.copyOf(loggers);
60+
this.majoritySize = loggers.size() / 2 + 1;
61+
}
62+
63+
public AsyncLoggerSet(List<AsyncLogger> loggers, int majoritySize) {
64+
this.loggers = ImmutableList.copyOf(loggers);
65+
this.majoritySize = majoritySize;
5966
}
6067

6168
void setEpoch(long e) {
@@ -151,7 +158,7 @@ <V> Map<AsyncLogger, V> waitForWriteQuorum(QuorumCall<AsyncLogger, V> q,
151158
* @return the number of nodes which are required to obtain a quorum.
152159
*/
153160
int getMajoritySize() {
154-
return loggers.size() / 2 + 1;
161+
return this.majoritySize;
155162
}
156163

157164
/**

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package org.apache.hadoop.hdfs.qjournal.client;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT;
21+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_KEY;
22+
2023
import java.io.IOException;
2124
import java.net.InetSocketAddress;
2225
import java.net.URI;
@@ -31,6 +34,7 @@
3134
import java.util.concurrent.TimeUnit;
3235
import java.util.concurrent.TimeoutException;
3336

37+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
3438
import org.apache.hadoop.util.Lists;
3539
import org.slf4j.Logger;
3640
import org.slf4j.LoggerFactory;
@@ -62,6 +66,7 @@
6266
import org.apache.hadoop.classification.VisibleForTesting;
6367
import org.apache.hadoop.thirdparty.com.google.common.base.Joiner;
6468
import org.apache.hadoop.util.Preconditions;
69+
6570
import org.apache.hadoop.thirdparty.protobuf.TextFormat;
6671

6772
/**
@@ -108,6 +113,7 @@ public class QuorumJournalManager implements JournalManager {
108113
private static final int OUTPUT_BUFFER_CAPACITY_DEFAULT = 512 * 1024;
109114
private int outputBufferCapacity;
110115
private final URLConnectionFactory connectionFactory;
116+
private int quorumJournalCount;
111117

112118
/** Limit logging about input stream selection to every 5 seconds max. */
113119
private static final long SELECT_INPUT_STREAM_LOG_INTERVAL_MS = 5000;
@@ -146,6 +152,13 @@ public QuorumJournalManager(Configuration conf,
146152
this.nameServiceId = nameServiceId;
147153
this.loggers = new AsyncLoggerSet(createLoggers(loggerFactory));
148154

155+
// Check whether the number of jn maintenance lists is valid
156+
int quorumThreshold = (quorumJournalCount / 2) + 1;
157+
Preconditions.checkArgument(
158+
this.loggers.size() >= quorumThreshold,
159+
"The total journalnode minus %s the number of blacklists must be greater than or equal to"
160+
+ " %s!", DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, quorumThreshold);
161+
149162
this.maxTxnsPerRpc =
150163
conf.getInt(QJM_RPC_MAX_TXNS_KEY, QJM_RPC_MAX_TXNS_DEFAULT);
151164
Preconditions.checkArgument(maxTxnsPerRpc > 0,
@@ -250,6 +263,9 @@ Map<AsyncLogger, NewEpochResponseProto> createNewUniqueEpoch()
250263

251264
@Override
252265
public void format(NamespaceInfo nsInfo, boolean force) throws IOException {
266+
if (isEnableJnMaintenance()) {
267+
throw new IOException("format() does not support enabling jn maintenance mode");
268+
}
253269
QuorumCall<AsyncLogger, Void> call = loggers.format(nsInfo, force);
254270
try {
255271
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -406,21 +422,39 @@ private void recoverUnclosedSegment(long segmentTxId) throws IOException {
406422
logToSync.getStartTxId(),
407423
logToSync.getEndTxId()));
408424
}
409-
410-
static List<AsyncLogger> createLoggers(Configuration conf,
425+
426+
List<AsyncLogger> createLoggers(Configuration conf,
427+
URI uri,
428+
NamespaceInfo nsInfo,
429+
AsyncLogger.Factory factory,
430+
String nameServiceId)
431+
throws IOException {
432+
String[] skipNodesHostPort = conf.getTrimmedStrings(
433+
DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT);
434+
return createLoggers(conf, uri, nsInfo, factory, nameServiceId, skipNodesHostPort);
435+
}
436+
437+
private List<AsyncLogger> createLoggers(Configuration conf,
411438
URI uri,
412439
NamespaceInfo nsInfo,
413440
AsyncLogger.Factory factory,
414-
String nameServiceId)
441+
String nameServiceId,
442+
String[] skipNodesHostPort)
415443
throws IOException {
416444
List<AsyncLogger> ret = Lists.newArrayList();
417445
List<InetSocketAddress> addrs = Util.getAddressesList(uri, conf);
418446
if (addrs.size() % 2 == 0) {
419447
LOG.warn("Quorum journal URI '" + uri + "' has an even number " +
420448
"of Journal Nodes specified. This is not recommended!");
421449
}
450+
this.quorumJournalCount = addrs.size();
451+
HostSet skipSet = DFSUtil.getInetSocketAddress(skipNodesHostPort);
422452
String jid = parseJournalId(uri);
423453
for (InetSocketAddress addr : addrs) {
454+
if(skipSet.match(addr)) {
455+
LOG.info("The node {} is a maintenance node and will skip initialization.", addr);
456+
continue;
457+
}
424458
ret.add(factory.createLogger(conf, nsInfo, jid, nameServiceId, addr));
425459
}
426460
return ret;
@@ -667,6 +701,9 @@ AsyncLoggerSet getLoggerSetForTests() {
667701

668702
@Override
669703
public void doPreUpgrade() throws IOException {
704+
if (isEnableJnMaintenance()) {
705+
throw new IOException("doPreUpgrade() does not support enabling jn maintenance mode");
706+
}
670707
QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade();
671708
try {
672709
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -684,6 +721,9 @@ public void doPreUpgrade() throws IOException {
684721

685722
@Override
686723
public void doUpgrade(Storage storage) throws IOException {
724+
if (isEnableJnMaintenance()) {
725+
throw new IOException("doUpgrade() does not support enabling jn maintenance mode");
726+
}
687727
QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage);
688728
try {
689729
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -701,6 +741,9 @@ public void doUpgrade(Storage storage) throws IOException {
701741

702742
@Override
703743
public void doFinalize() throws IOException {
744+
if (isEnableJnMaintenance()) {
745+
throw new IOException("doFinalize() does not support enabling jn maintenance mode");
746+
}
704747
QuorumCall<AsyncLogger, Void> call = loggers.doFinalize();
705748
try {
706749
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -719,6 +762,9 @@ public void doFinalize() throws IOException {
719762
@Override
720763
public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
721764
int targetLayoutVersion) throws IOException {
765+
if (isEnableJnMaintenance()) {
766+
throw new IOException("canRollBack() does not support enabling jn maintenance mode");
767+
}
722768
QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage,
723769
prevStorage, targetLayoutVersion);
724770
try {
@@ -753,6 +799,9 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
753799

754800
@Override
755801
public void doRollback() throws IOException {
802+
if (isEnableJnMaintenance()) {
803+
throw new IOException("doRollback() does not support enabling jn maintenance mode");
804+
}
756805
QuorumCall<AsyncLogger, Void> call = loggers.doRollback();
757806
try {
758807
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -770,6 +819,9 @@ public void doRollback() throws IOException {
770819

771820
@Override
772821
public void discardSegments(long startTxId) throws IOException {
822+
if (isEnableJnMaintenance()) {
823+
throw new IOException("discardSegments() does not support enabling jn maintenance mode");
824+
}
773825
QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId);
774826
try {
775827
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -789,6 +841,9 @@ public void discardSegments(long startTxId) throws IOException {
789841

790842
@Override
791843
public long getJournalCTime() throws IOException {
844+
if (isEnableJnMaintenance()) {
845+
throw new IOException("getJournalCTime() does not support enabling jn maintenance mode");
846+
}
792847
QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime();
793848
try {
794849
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -819,4 +874,8 @@ public long getJournalCTime() throws IOException {
819874

820875
throw new AssertionError("Unreachable code.");
821876
}
877+
878+
private boolean isEnableJnMaintenance() {
879+
return this.loggers.size() < quorumJournalCount;
880+
}
822881
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class HostSet implements Iterable<InetSocketAddress> {
4545
* The function that checks whether there exists an entry foo in the set
4646
* so that foo &lt;= addr.
4747
*/
48-
boolean matchedBy(InetSocketAddress addr) {
48+
public boolean matchedBy(InetSocketAddress addr) {
4949
Collection<Integer> ports = addrs.get(addr.getAddress());
5050
return addr.getPort() == 0 ? !ports.isEmpty() : ports.contains(addr
5151
.getPort());
@@ -55,23 +55,23 @@ boolean matchedBy(InetSocketAddress addr) {
5555
* The function that checks whether there exists an entry foo in the set
5656
* so that addr &lt;= foo.
5757
*/
58-
boolean match(InetSocketAddress addr) {
58+
public boolean match(InetSocketAddress addr) {
5959
int port = addr.getPort();
6060
Collection<Integer> ports = addrs.get(addr.getAddress());
6161
boolean exactMatch = ports.contains(port);
6262
boolean genericMatch = ports.contains(0);
6363
return exactMatch || genericMatch;
6464
}
6565

66-
boolean isEmpty() {
66+
public boolean isEmpty() {
6767
return addrs.isEmpty();
6868
}
6969

70-
int size() {
70+
public int size() {
7171
return addrs.size();
7272
}
7373

74-
void add(InetSocketAddress addr) {
74+
public void add(InetSocketAddress addr) {
7575
Preconditions.checkArgument(!addr.isUnresolved());
7676
addrs.put(addr.getAddress(), addr.getPort());
7777
}

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6333,6 +6333,20 @@
63336333
</description>
63346334
</property>
63356335

6336+
<property>
6337+
<name>dfs.journalnode.maintenance.nodes</name>
6338+
<value></value>
6339+
<description>
6340+
In the case of one out of three journal nodes being down, theoretically the service can still
6341+
continue. However, in reality, the downed node may not recover quickly. If the Namenode needs
6342+
to be restarted, it will try the downed journal node through the lengthy RPC retry mechanism,
6343+
resulting in a long initialization time for the Namenode to provide services. By adding the
6344+
downed journal node to the maintenance nodes, the initialization time of the Namenode in such
6345+
scenarios can be accelerated.
6346+
</description>
6347+
</property>
6348+
6349+
63366350
<property>
63376351
<name>dfs.namenode.lease-hard-limit-sec</name>
63386352
<value>1200</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/QJMTestUtil.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public abstract class QJMTestUtil {
4747
public static final NamespaceInfo FAKE_NSINFO = new NamespaceInfo(
4848
12345, "mycluster", "my-bp", 0L);
4949
public static final String JID = "test-journal";
50+
public static final String FAKE_HOSTNAME = "jn";
5051

5152
public static byte[] createTxnData(int startTxn, int numTxns) throws Exception {
5253
DataOutputBuffer buf = new DataOutputBuffer();

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestIPCLoggerChannel.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hdfs.qjournal.client;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_KEY;
2021
import static org.junit.Assert.*;
2122

2223
import java.io.IOException;

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package org.apache.hadoop.hdfs.qjournal.client;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_KEY;
21+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_RPC_PORT_DEFAULT;
22+
import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.FAKE_HOSTNAME;
2023
import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.FAKE_NSINFO;
2124
import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.JID;
2225
import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.verifyEdits;
@@ -27,6 +30,7 @@
2730
import static org.junit.Assert.assertEquals;
2831
import static org.junit.Assert.assertFalse;
2932
import static org.junit.Assert.assertNull;
33+
import static org.junit.Assert.assertThrows;
3034
import static org.junit.Assert.assertTrue;
3135
import static org.junit.Assert.fail;
3236
import static org.mockito.ArgumentMatchers.eq;
@@ -39,6 +43,7 @@
3943
import java.net.UnknownHostException;
4044
import java.util.ArrayList;
4145
import java.util.List;
46+
import java.util.StringJoiner;
4247
import java.util.concurrent.Semaphore;
4348
import java.util.concurrent.TimeoutException;
4449
import java.util.concurrent.atomic.AtomicInteger;
@@ -1171,6 +1176,21 @@ public void testSelectViaRpcAfterJNRestart() throws Exception {
11711176
}
11721177
}
11731178

1179+
/**
1180+
* Tests to throw an exception if the jn maintenance nodes exceeds half of the journalnode number.
1181+
*/
1182+
@Test
1183+
public void testJNBlackListViaRpcTwoJNsError() throws Exception {
1184+
StringJoiner blackListBuff = new StringJoiner(",");
1185+
for (int i = 0; i < 2; i++) {
1186+
blackListBuff.add(String.format("%s%d:%d",
1187+
FAKE_HOSTNAME, i, DFS_JOURNALNODE_RPC_PORT_DEFAULT));
1188+
}
1189+
this.conf.set(DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, blackListBuff.toString());
1190+
assertThrows(IllegalArgumentException.class, () -> createSpyingQJM());
1191+
}
1192+
1193+
11741194
@Test
11751195
public void testGetJournalAddressListWithResolution() throws Exception {
11761196
Configuration configuration = new Configuration();

0 commit comments

Comments
 (0)