From 2e30072f3b387c7d8f0cb38651c7292c7ffaa13e Mon Sep 17 00:00:00 2001 From: sdong <> Date: Mon, 5 Nov 2012 18:02:10 -0800 Subject: [PATCH] HDFS: Fix DFSClient's memory leak on OutputStreams hold by FileChecker Summary: Currently, OutputStreams are not guaranteed to be removed from DFSClient.filechecker, if OutStream.close() fails. So if clients give it up after the close() failure (which is the recommandation), the OutputStream object is leaked to FileChecker.pendingCreates forever. This patch tries to fix the issue by always removing files from filechecker when OutputStream.close() is called. Also, the file is removed from the map when recoverLease() is called Test Plan: ant test Reviewers: hkuang, tomasz, weiyan Reviewed By: hkuang Task ID: 1862779 --- .../org/apache/hadoop/hdfs/DFSClient.java | 3 ++ .../apache/hadoop/hdfs/DFSOutputStream.java | 43 +++++++++++-------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java b/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java index e4aef4b2..16fcad90 100644 --- a/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java +++ b/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java @@ -954,6 +954,9 @@ public OutputStream create(String src, */ boolean recoverLease(String src, boolean discardLastBlock) throws IOException { checkOpen(); + // We remove the file from local lease checker. Usually it is already been + // removed by the client but we want to be extra safe. + leasechecker.remove(src); if (this.namenodeProtocolProxy == null) { return versionBasedRecoverLease(src); diff --git a/src/hdfs/org/apache/hadoop/hdfs/DFSOutputStream.java b/src/hdfs/org/apache/hadoop/hdfs/DFSOutputStream.java index f60d6231..080e4296 100644 --- a/src/hdfs/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/src/hdfs/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -1657,27 +1657,36 @@ private void waitForAckedSeqno(long seqnumToWaitFor) throws IOException { */ @Override public void close() throws IOException { - if (closed) { - IOException e = lastException; - if (e == null) - return; - else - throw e; - } - try { - closeInternal(); - dfsClient.leasechecker.remove(src); + if (closed) { + IOException e = lastException; + if (e == null) + return; + else + throw e; + } + + try { + closeInternal(); - if (s != null) { - for (int i = 0; i < s.length; i++) { - s[i].close(); + if (s != null) { + for (int i = 0; i < s.length; i++) { + s[i].close(); + } + s = null; } - s = null; + } catch (IOException e) { + lastException = e; + throw e; } - } catch (IOException e) { - lastException = e; - throw e; + } finally { + // We always try to remove the connection from the lease to + // avoid memory leak. In case of failed close(), it is possible + // that later users' retry of close() could succeed but fail on + // lease expiration. Since clients don't possibly write more data + // after calling close(), this case doesn't change any guarantee + // of data itself. + dfsClient.leasechecker.remove(src); } }