From eb7d5cd48a1de49a6edb85e160caf0012bfc0ca5 Mon Sep 17 00:00:00 2001
From: mbautin <>
Date: Tue, 24 Jul 2012 12:42:05 -0700
Subject: [PATCH] Include dependencies with Hadoop when publishing jar to maven
Summary: As external dependencies are added to Hadoop, we also have to add them to HBase, because the Hadoop .pom file we publish to the local Maven repository does not contain dependencies. Here I am trying to use the makepom target to auto-generate those dependencies from ivy. Also, I am setting Hadoop version to 0.20. I will double-check that this works correctly with build_all.sh in the warehouse branch.
Test Plan:
In Hadoop directory:
ant clean
copy-hdfs-jars-to-maven.sh
In HBase directory:
Build HBase
In the warehouse branch checkout:
./build_all.sh --hadoop=true
Reviewers: pritam, avf, weiyan, sdong, dms
Reviewed By: pritam
---
build.xml | 14 +++++----
copy-hdfs-jars-to-maven.sh | 55 +++++++++++++++++-----------------
edit_generated_pom.py | 60 ++++++++++++++++++++++++++++++++++++++
3 files changed, 97 insertions(+), 32 deletions(-)
create mode 100644 edit_generated_pom.py
diff --git a/build.xml b/build.xml
index 3d406575..ad0a7f70 100644
--- a/build.xml
+++ b/build.xml
@@ -27,7 +27,7 @@
-
+
@@ -203,15 +203,15 @@
-
-
+
+
-
+
@@ -1945,7 +1945,7 @@
@@ -2040,5 +2040,9 @@
+
+
+
+
diff --git a/copy-hdfs-jars-to-maven.sh b/copy-hdfs-jars-to-maven.sh
index 7ffdf8f1..6ee632d2 100755
--- a/copy-hdfs-jars-to-maven.sh
+++ b/copy-hdfs-jars-to-maven.sh
@@ -5,52 +5,52 @@
# and test) built in titan/VENDOR/hadoop-0.20/.
#
+set -e -u -o pipefail
BASEDIR=`dirname $0`
cd ${BASEDIR}
-if [ ! -f build/hadoop-0.20.1-dev-core.jar ]; then
- if [ ! -f build/hadoop-0.20-core.jar ]; then
- echo "core jar not found. Running 'ant jar'..."
- ant jar | grep BUILD;
- fi
+VERSION=$( ant -q print-version | head -1 | awk '{print $2}' )
+if [ -z "$VERSION" ]; then
+ echo "Unable to determine Hadoop version" >&2
+ exit 1
fi
-if [ ! -f build/hadoop-0.20.1-dev-test.jar ]; then
- if [ ! -f build/hadoop-0.20-test.jar ]; then
- echo "test jar not found. Running 'ant jar-test'..."
- ant jar-test | grep BUILD;
- fi
+TARGETS=""
+
+CORE_JAR=build/hadoop-$VERSION-core.jar
+if [ ! -f $CORE_JAR ]; then
+ TARGETS="$TARGETS jar"
fi
+CORE_POM=build/ivy/maven/generated.pom
+if [ ! -f $CORE_POM ]; then
+ TARGETS="$TARGETS makepom"
+fi
-#
-# The names of core/test jar name depend
-# on whether they were generated using
-# build_all.sh script or just the vanilla
-# simple ant jar/jar-test
-#
-if [ -f build/hadoop-0.20.1-dev-core.jar ]; then
- CORE_JAR=build/hadoop-0.20.1-dev-core.jar
-else
- CORE_JAR=build/hadoop-0.20-core.jar
+TEST_JAR=build/hadoop-$VERSION-test.jar
+if [ ! -f $TEST_JAR ]; then
+ TARGETS="$TARGETS jar-test"
fi
-if [ -f build/hadoop-0.20.1-dev-test.jar ]; then
- TEST_JAR=build/hadoop-0.20.1-dev-test.jar
-else
- TEST_JAR=build/hadoop-0.20-test.jar
+if [ -n "$TARGETS" ]; then
+ ant $TARGETS
fi
+# Clear the optional flag on Hadoop dependencies so these dependencies can be
+# included transitively in other projects.
+CORE_POM_MODIFIED=$CORE_POM.new
+./edit_generated_pom.py >$CORE_POM_MODIFIED
+
echo "** Publishing hadoop* core & test jars "
echo "** to "
echo "** your local maven repo (~/.m2/repository). "
echo "** HBase builds will pick up the HDFS* jars from the local maven repo."
mvn install:install-file \
- -DgeneratePom=true \
+ -DpomFile=$CORE_POM_MODIFIED \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-core \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${CORE_JAR}
@@ -58,6 +58,7 @@ mvn install:install-file \
-DgeneratePom=true \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-test \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${TEST_JAR}
+
diff --git a/edit_generated_pom.py b/edit_generated_pom.py
new file mode 100644
index 00000000..c2f88d39
--- /dev/null
+++ b/edit_generated_pom.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+'''
+Reads the automatically generated Hadoop pom file, removes the "optional"
+flag from dependencies so that they could be included transitively into other
+projects such as HBase, and removes certain dependencies that are not required
+and could even break the code (e.g. an old version of xerces). Writes the
+modified project object model XML to standard output.
+'''
+
+import os
+import re
+import sys
+
+from xml.dom.minidom import parse
+
+NON_TRANSITIVE_DEPS = [
+ # Old version, breaks HBase
+ 'xerces',
+
+ # Not used in production
+ 'checkstyle',
+ 'jdiff',
+
+ # A release audit tool, probably not used in prod
+ 'rat-lib',
+]
+
+POM_FILE = 'build/ivy/maven/generated.pom'
+doc = parse(POM_FILE)
+deps = doc.getElementsByTagName('dependencies')[0]
+
+for dep in deps.getElementsByTagName('dependency'):
+ for c in dep.childNodes:
+ if (c.nodeName == 'artifactId' and
+ c.firstChild and
+ c.firstChild.nodeValue and
+ c.firstChild.nodeValue.strip() in NON_TRANSITIVE_DEPS):
+ deps.removeChild(dep)
+ break
+
+ for o in dep.getElementsByTagName('optional'):
+ dep.removeChild(o)
+
+out_lines = doc.toprettyxml(indent=' ' * 2)
+lines = []
+for l in out_lines.split('\n'):
+ l = l.rstrip()
+ if l:
+ lines.append(l)
+output = '\n'.join(lines)
+
+# Make sure values stay on the same line: value
+output = re.sub(
+ r'(<([a-zA-Z]+)>)'
+ r'\s*([^<>]+?)\s*'
+ r'(\2>)', r'\1\3\4', output)
+
+print output
+