Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
OfflineImageDecompressor tool
Browse files Browse the repository at this point in the history
Summary:
This tool is for decompressing compressed image files, so we can check
corruptions. For as long as we can get through the header:
-layout
-namespace id
-numFiles
-genstamp
-txid (optional)

syntactically, we should be able to decompress the rest. Also this tool
is good for fixing image corruptions, better than customizing loading
and saving code.

Test Plan:
manually, tested dfsdev image, decompressed and parsed with
oiv

Reviewers: hkuang, pritam, weiyan

Reviewed By: hkuang
  • Loading branch information
tomasz authored and Alex Feinberg committed Nov 8, 2012
1 parent 20a5d4a commit 8cd6f78
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 6 deletions.
4 changes: 4 additions & 0 deletions bin/hadoop
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ if [ $# = 0 ]; then
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
echo " oid apply the offline fsimage decompressor to an fsimage"
echo " Use -help to see options"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
Expand Down Expand Up @@ -324,6 +325,9 @@ elif [ "$COMMAND" = "oiv" ] ; then
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "oid" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageDecompressor
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ long getNumTransactions() {
/**
* Stream wrapper that keeps track of the current stream position.
*/
static class PositionTrackingInputStream extends FilterInputStream {
public static class PositionTrackingInputStream extends FilterInputStream {
private long curPos = 0;
private long markPos = -1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,19 @@
import org.apache.hadoop.io.compress.CompressionCodecFactory;

import org.apache.hadoop.io.Text;
import org.mortbay.log.Log;

/**
* Simple container class that handles support for compressed fsimage files.
*/
class FSImageCompression {
public class FSImageCompression {

/** Codec to use to save or load image, or null if the image is not compressed */
private CompressionCodec imageCodec;

public boolean isNoOpCompression() {
return imageCodec == null;
}

/**
* Create a "noop" compression - i.e. uncompressed
*/
Expand All @@ -57,7 +60,7 @@ private FSImageCompression(CompressionCodec codec) {
/**
* Create a "noop" compression - i.e. uncompressed
*/
static FSImageCompression createNoopCompression() {
public static FSImageCompression createNoopCompression() {
return new FSImageCompression();
}

Expand Down Expand Up @@ -104,7 +107,7 @@ private static FSImageCompression createCompression(Configuration conf,
* @throws IOException if the specified codec is not available or the
* underlying IO fails.
*/
static FSImageCompression readCompressionHeader(
public static FSImageCompression readCompressionHeader(
Configuration conf,
DataInputStream dis) throws IOException
{
Expand All @@ -126,7 +129,7 @@ static FSImageCompression readCompressionHeader(
* @throws IOException If the decompressor cannot be instantiated or an IO
* error occurs.
*/
DataInputStream unwrapInputStream(InputStream is) throws IOException {
public DataInputStream unwrapInputStream(InputStream is) throws IOException {
if (imageCodec != null) {
return new DataInputStream(imageCodec.createInputStream(is));
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;

import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
import org.apache.hadoop.hdfs.server.namenode.FSImageCompression;

/**
* OfflineImageViewer to dump the contents of an Hadoop image file to XML or the
* console. Main entry point into utility, either via the command line or
* programatically.
*/
public class OfflineImageDecompressor {

private final static String usage = "Usage: bin/hdfs oid -i INPUTFILE -o OUTPUTFILE\n"
+ "Offline Image Decompressor\n"
+ "The oid utility will attempt to decompress image files.\n"
+ "The tool works offline and does not require a running cluster in\n"
+ "order to process an image file.\n"
+ "Required command line arguments:\n"
+ "-i,--inputFile <arg> FSImage file to process.\n"
+ "-o,--outputFile <arg> Name of output file. If the specified\n"
+ " file exists, it will be overwritten.\n";

private final String inputFile;
private final String outputFile;
private int lastProgress = 0;

public OfflineImageDecompressor(String inputFile, String outputFile) {
this.inputFile = inputFile;
this.outputFile = outputFile;
}

/**
* Process image file.
*/
private void go() throws IOException {
long start = System.currentTimeMillis();
System.out.println("Decompressing image file: " + inputFile + " to "
+ outputFile);
DataInputStream in = null;
DataOutputStream out = null;

try {
// setup in
PositionTrackingInputStream ptis = new PositionTrackingInputStream(
new FileInputStream(new File(inputFile)));
in = new DataInputStream(ptis);

// read header information
int imgVersion = in.readInt();
if (!LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imgVersion)) {
System.out
.println("Image is not compressed. No output will be produced.");
return;
}
int namespaceId = in.readInt();
long numFiles = in.readLong();
long genstamp = in.readLong();

long imgTxId = -1;
if (LayoutVersion.supports(Feature.STORED_TXIDS, imgVersion)) {
imgTxId = in.readLong();
}
FSImageCompression compression = FSImageCompression
.readCompressionHeader(new Configuration(), in);
if (compression.isNoOpCompression()) {
System.out
.println("Image is not compressed. No output will be produced.");
return;
}
in = compression.unwrapInputStream(in);
System.out.println("Starting decompression.");

// setup output
out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(
outputFile)));

// write back the uncompressed information
out.writeInt(imgVersion);
out.writeInt(namespaceId);
out.writeLong(numFiles);
out.writeLong(genstamp);
if (LayoutVersion.supports(Feature.STORED_TXIDS, imgVersion)) {
out.writeLong(imgTxId);
}
// no compression
out.writeBoolean(false);

// copy the data
long size = new File(inputFile).length();
// read in 1MB chunks
byte[] block = new byte[1024 * 1024];
while (true) {
int bytesRead = in.read(block);
if (bytesRead <= 0)
break;
out.write(block, 0, bytesRead);
printProgress(ptis.getPos(), size);
}

out.close();

long stop = System.currentTimeMillis();
System.out.println("Input file : " + inputFile + " size: " + size);
System.out.println("Output file: " + outputFile + " size: "
+ new File(outputFile).length());
System.out.println("Decompression completed in " + (stop - start)
+ " ms.");
} finally {
if (in != null)
in.close();
if (out != null)
out.close();
}
}

/**
* Print the progress.
*/
private void printProgress(long read, long size) {
int progress = Math.min(100, (int) ((100 * read) / size));
if (progress > lastProgress) {
lastProgress = progress;
System.out.println("Completed " + lastProgress + " % ");
}
}

/**
* Build command-line options and descriptions
*/
public static Options buildOptions() {
Options options = new Options();

// Build in/output file arguments, which are required, but there is no
// addOption method that can specify this
OptionBuilder.isRequired();
OptionBuilder.hasArgs();
OptionBuilder.withLongOpt("outputFile");
options.addOption(OptionBuilder.create("o"));

OptionBuilder.isRequired();
OptionBuilder.hasArgs();
OptionBuilder.withLongOpt("inputFile");
options.addOption(OptionBuilder.create("i"));

options.addOption("h", "help", false, "");
return options;
}

/**
* Entry point to command-line-driven operation. User may specify options and
* start fsimage viewer from the command line. Program will process image file
* and exit cleanly or, if an error is encountered, inform user and exit.
*
* @param args
* Command line options
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Options options = buildOptions();
if (args.length == 0) {
printUsage();
return;
}

CommandLineParser parser = new PosixParser();
CommandLine cmd;

try {
cmd = parser.parse(options, args);
} catch (ParseException e) {
System.out.println("Error parsing command-line options: ");
printUsage();
return;
}

if (cmd.hasOption("h")) { // print help and exit
printUsage();
return;
}

String inputFile = cmd.getOptionValue("i");
String outputFile = cmd.getOptionValue("o");

try {
OfflineImageDecompressor d = new OfflineImageDecompressor(inputFile,
outputFile);
d.go();
} catch (EOFException e) {
System.err.println("Input file ended unexpectedly. Exiting");
} catch (IOException e) {
System.err.println("Encountered exception. Exiting: " + e.getMessage());
}
}

/**
* Print application usage instructions.
*/
private static void printUsage() {
System.out.println(usage);
}
}

0 comments on commit 8cd6f78

Please sign in to comment.