Skip to content

Commit f73d13a

Browse files
author
Huaxiang Sun
committed
HDFS-16540 Data locality is lost when DataNode pod restarts in kubernetes
1 parent 3b46aae commit f73d13a

File tree

3 files changed

+45
-1
lines changed

3 files changed

+45
-1
lines changed

.BUILDING.txt.swp

16 KB
Binary file not shown.

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,7 @@ public void registerDatanode(DatanodeRegistration nodeReg)
11711171
nodeN = null;
11721172
}
11731173

1174+
boolean updateHost2DatanodeMap = false;
11741175
if (nodeS != null) {
11751176
if (nodeN == nodeS) {
11761177
// The same datanode has been just restarted to serve the same data
@@ -1189,7 +1190,11 @@ public void registerDatanode(DatanodeRegistration nodeReg)
11891190
nodes with its data cleared (or user can just remove the StorageID
11901191
value in "VERSION" file under the data directory of the datanode,
11911192
but this is might not work if VERSION file format has changed
1192-
*/
1193+
*/
1194+
// Check if nodeS's host information is same as nodeReg's, if not,
1195+
// it needs to update host2DatanodeMap accordringly.
1196+
updateHost2DatanodeMap = !nodeS.getXferAddr().equals(nodeReg.getXferAddr());
1197+
11931198
NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
11941199
+ " is replaced by " + nodeReg + " with the same storageID "
11951200
+ nodeReg.getDatanodeUuid());
@@ -1199,6 +1204,11 @@ nodes with its data cleared (or user can just remove the StorageID
11991204
try {
12001205
// update cluster map
12011206
getNetworkTopology().remove(nodeS);
1207+
1208+
// Update Host2DatanodeMap
1209+
if (updateHost2DatanodeMap) {
1210+
getHost2DatanodeMap().remove(nodeS);
1211+
}
12021212
if(shouldCountVersion(nodeS)) {
12031213
decrementVersionCount(nodeS.getSoftwareVersion());
12041214
}
@@ -1217,6 +1227,11 @@ nodes with its data cleared (or user can just remove the StorageID
12171227
nodeS.setDependentHostNames(
12181228
getNetworkDependenciesWithDefault(nodeS));
12191229
}
1230+
1231+
if (updateHost2DatanodeMap) {
1232+
getHost2DatanodeMap().add(nodeS);
1233+
}
1234+
12201235
getNetworkTopology().add(nodeS);
12211236
resolveUpgradeDomain(nodeS);
12221237

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,35 @@ public void testNumVersionsCorrectAfterReregister()
138138
mapToCheck.get("version1").intValue(), 1);
139139
}
140140

141+
/**
142+
* This test checks that if a node is re-registered with a different ip, its
143+
* host2DatanodeMap is correctly updated with the new ip.
144+
*/
145+
@Test
146+
public void testHost2NodeMapCorrectAfterReregister()
147+
throws IOException, InterruptedException {
148+
//Create the DatanodeManager which will be tested
149+
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
150+
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
151+
Configuration conf = new Configuration();
152+
DatanodeManager dm = mockDatanodeManager(fsn, conf);
153+
154+
String storageID = "someStorageID1";
155+
String ipOld = "someIPOld" + storageID;
156+
String ipNew = "someIPNew" + storageID;
157+
158+
dm.registerDatanode(new DatanodeRegistration(
159+
new DatanodeID(ipOld, "", storageID, 9000, 0, 0, 0),
160+
null, null, "version"));
161+
162+
dm.registerDatanode(new DatanodeRegistration(
163+
new DatanodeID(ipNew, "", storageID, 9000, 0, 0, 0),
164+
null, null, "version"));
165+
166+
assertNull("should be no node with old ip", dm.getDatanodeByHost(ipOld));
167+
assertNotNull("should be a node with new ip", dm.getDatanodeByHost(ipNew));
168+
}
169+
141170
/**
142171
* This test sends a random sequence of node registrations and node removals
143172
* to the DatanodeManager (of nodes with different IDs and versions), and

0 commit comments

Comments
 (0)