diff --git a/build.xml b/build.xml
index 3d406575..ad0a7f70 100644
--- a/build.xml
+++ b/build.xml
@@ -27,7 +27,7 @@
-
+
@@ -203,15 +203,15 @@
-
-
+
+
-
+
@@ -1945,7 +1945,7 @@
@@ -2040,5 +2040,9 @@
+
+
+
+
diff --git a/copy-hdfs-jars-to-maven.sh b/copy-hdfs-jars-to-maven.sh
index 7ffdf8f1..6ee632d2 100755
--- a/copy-hdfs-jars-to-maven.sh
+++ b/copy-hdfs-jars-to-maven.sh
@@ -5,52 +5,52 @@
# and test) built in titan/VENDOR/hadoop-0.20/.
#
+set -e -u -o pipefail
BASEDIR=`dirname $0`
cd ${BASEDIR}
-if [ ! -f build/hadoop-0.20.1-dev-core.jar ]; then
- if [ ! -f build/hadoop-0.20-core.jar ]; then
- echo "core jar not found. Running 'ant jar'..."
- ant jar | grep BUILD;
- fi
+VERSION=$( ant -q print-version | head -1 | awk '{print $2}' )
+if [ -z "$VERSION" ]; then
+ echo "Unable to determine Hadoop version" >&2
+ exit 1
fi
-if [ ! -f build/hadoop-0.20.1-dev-test.jar ]; then
- if [ ! -f build/hadoop-0.20-test.jar ]; then
- echo "test jar not found. Running 'ant jar-test'..."
- ant jar-test | grep BUILD;
- fi
+TARGETS=""
+
+CORE_JAR=build/hadoop-$VERSION-core.jar
+if [ ! -f $CORE_JAR ]; then
+ TARGETS="$TARGETS jar"
fi
+CORE_POM=build/ivy/maven/generated.pom
+if [ ! -f $CORE_POM ]; then
+ TARGETS="$TARGETS makepom"
+fi
-#
-# The names of core/test jar name depend
-# on whether they were generated using
-# build_all.sh script or just the vanilla
-# simple ant jar/jar-test
-#
-if [ -f build/hadoop-0.20.1-dev-core.jar ]; then
- CORE_JAR=build/hadoop-0.20.1-dev-core.jar
-else
- CORE_JAR=build/hadoop-0.20-core.jar
+TEST_JAR=build/hadoop-$VERSION-test.jar
+if [ ! -f $TEST_JAR ]; then
+ TARGETS="$TARGETS jar-test"
fi
-if [ -f build/hadoop-0.20.1-dev-test.jar ]; then
- TEST_JAR=build/hadoop-0.20.1-dev-test.jar
-else
- TEST_JAR=build/hadoop-0.20-test.jar
+if [ -n "$TARGETS" ]; then
+ ant $TARGETS
fi
+# Clear the optional flag on Hadoop dependencies so these dependencies can be
+# included transitively in other projects.
+CORE_POM_MODIFIED=$CORE_POM.new
+./edit_generated_pom.py >$CORE_POM_MODIFIED
+
echo "** Publishing hadoop* core & test jars "
echo "** to "
echo "** your local maven repo (~/.m2/repository). "
echo "** HBase builds will pick up the HDFS* jars from the local maven repo."
mvn install:install-file \
- -DgeneratePom=true \
+ -DpomFile=$CORE_POM_MODIFIED \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-core \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${CORE_JAR}
@@ -58,6 +58,7 @@ mvn install:install-file \
-DgeneratePom=true \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-test \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${TEST_JAR}
+
diff --git a/edit_generated_pom.py b/edit_generated_pom.py
new file mode 100644
index 00000000..c2f88d39
--- /dev/null
+++ b/edit_generated_pom.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+'''
+Reads the automatically generated Hadoop pom file, removes the "optional"
+flag from dependencies so that they could be included transitively into other
+projects such as HBase, and removes certain dependencies that are not required
+and could even break the code (e.g. an old version of xerces). Writes the
+modified project object model XML to standard output.
+'''
+
+import os
+import re
+import sys
+
+from xml.dom.minidom import parse
+
+NON_TRANSITIVE_DEPS = [
+ # Old version, breaks HBase
+ 'xerces',
+
+ # Not used in production
+ 'checkstyle',
+ 'jdiff',
+
+ # A release audit tool, probably not used in prod
+ 'rat-lib',
+]
+
+POM_FILE = 'build/ivy/maven/generated.pom'
+doc = parse(POM_FILE)
+deps = doc.getElementsByTagName('dependencies')[0]
+
+for dep in deps.getElementsByTagName('dependency'):
+ for c in dep.childNodes:
+ if (c.nodeName == 'artifactId' and
+ c.firstChild and
+ c.firstChild.nodeValue and
+ c.firstChild.nodeValue.strip() in NON_TRANSITIVE_DEPS):
+ deps.removeChild(dep)
+ break
+
+ for o in dep.getElementsByTagName('optional'):
+ dep.removeChild(o)
+
+out_lines = doc.toprettyxml(indent=' ' * 2)
+lines = []
+for l in out_lines.split('\n'):
+ l = l.rstrip()
+ if l:
+ lines.append(l)
+output = '\n'.join(lines)
+
+# Make sure values stay on the same line: value
+output = re.sub(
+ r'(<([a-zA-Z]+)>)'
+ r'\s*([^<>]+?)\s*'
+ r'(\2>)', r'\1\3\4', output)
+
+print output
+