Skip to content

Commit 52abc9f

Browse files
committed
HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
(cherry picked from commit 2efab92)
1 parent 52c6d77 commit 52abc9f

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2118,6 +2118,16 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block,
21182118
return null;
21192119
}
21202120

2121+
// skip if source datanodes for reconstructing ec block are not enough
2122+
if (block.isStriped()) {
2123+
BlockInfoStriped stripedBlock = (BlockInfoStriped) block;
2124+
if (stripedBlock.getRealDataBlockNum() > srcNodes.length) {
2125+
LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block);
2126+
NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
2127+
return null;
2128+
}
2129+
}
2130+
21212131
// liveReplicaNodes can include READ_ONLY_SHARED replicas which are
21222132
// not included in the numReplicas.liveReplicas() count
21232133
assert liveReplicaNodes.size() >= numReplicas.liveReplicas();

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,102 @@ public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception {
852852
0, numReplicas.redundantInternalBlocks());
853853
}
854854

855+
@Test
856+
public void testSkipReconstructionWithManyBusyNodes() {
857+
long blockId = -9223372036854775776L; // real ec block id
858+
// RS-3-2 EC policy
859+
ErasureCodingPolicy ecPolicy =
860+
SystemErasureCodingPolicies.getPolicies().get(1);
861+
862+
// create an EC block group: 3 data blocks + 2 parity blocks
863+
Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
864+
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
865+
866+
// create 4 storageInfo, which means 1 block is missing
867+
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
868+
"storage1", "1.1.1.1", "rack1", "host1");
869+
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
870+
"storage2", "2.2.2.2", "rack2", "host2");
871+
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
872+
"storage3", "3.3.3.3", "rack3", "host3");
873+
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
874+
"storage4", "4.4.4.4", "rack4", "host4");
875+
876+
// link block with storage
877+
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
878+
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
879+
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
880+
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
881+
882+
addEcBlockToBM(blockId, ecPolicy);
883+
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
884+
885+
// reconstruction should be scheduled
886+
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
887+
assertNotNull(work);
888+
889+
// simulate the 2 nodes reach maxReplicationStreams
890+
for(int i = 0; i < bm.maxReplicationStreams; i++){
891+
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
892+
ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
893+
}
894+
895+
// reconstruction should be skipped since the number of non-busy nodes are not enough
896+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
897+
assertNull(work);
898+
}
899+
900+
@Test
901+
public void testSkipReconstructionWithManyBusyNodes2() {
902+
long blockId = -9223372036854775776L; // real ec block id
903+
// RS-3-2 EC policy
904+
ErasureCodingPolicy ecPolicy =
905+
SystemErasureCodingPolicies.getPolicies().get(1);
906+
907+
// create an EC block group: 2 data blocks + 2 parity blocks
908+
Block aBlockGroup = new Block(blockId,
909+
ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0);
910+
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
911+
912+
// create 3 storageInfo, which means 1 block is missing
913+
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
914+
"storage1", "1.1.1.1", "rack1", "host1");
915+
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
916+
"storage2", "2.2.2.2", "rack2", "host2");
917+
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
918+
"storage3", "3.3.3.3", "rack3", "host3");
919+
920+
// link block with storage
921+
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
922+
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
923+
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
924+
925+
addEcBlockToBM(blockId, ecPolicy);
926+
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
927+
928+
// reconstruction should be scheduled
929+
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
930+
assertNotNull(work);
931+
932+
// simulate the 1 node reaches maxReplicationStreams
933+
for(int i = 0; i < bm.maxReplicationStreams; i++){
934+
ds2.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
935+
}
936+
937+
// reconstruction should still be scheduled since there are 2 source nodes to create 2 blocks
938+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
939+
assertNotNull(work);
940+
941+
// simulate the 1 more node reaches maxReplicationStreams
942+
for(int i = 0; i < bm.maxReplicationStreams; i++){
943+
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
944+
}
945+
946+
// reconstruction should be skipped since the number of non-busy nodes are not enough
947+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
948+
assertNull(work);
949+
}
950+
855951
@Test
856952
public void testFavorDecomUntilHardLimit() throws Exception {
857953
bm.maxReplicationStreams = 0;

0 commit comments

Comments
 (0)