diff --git a/build.xml b/build.xml index 3d406575..ad0a7f70 100644 --- a/build.xml +++ b/build.xml @@ -27,7 +27,7 @@ - + @@ -203,15 +203,15 @@ - - + + - + @@ -1945,7 +1945,7 @@ @@ -2040,5 +2040,9 @@ + + + + diff --git a/copy-hdfs-jars-to-maven.sh b/copy-hdfs-jars-to-maven.sh index 7ffdf8f1..6ee632d2 100755 --- a/copy-hdfs-jars-to-maven.sh +++ b/copy-hdfs-jars-to-maven.sh @@ -5,52 +5,52 @@ # and test) built in titan/VENDOR/hadoop-0.20/. # +set -e -u -o pipefail BASEDIR=`dirname $0` cd ${BASEDIR} -if [ ! -f build/hadoop-0.20.1-dev-core.jar ]; then - if [ ! -f build/hadoop-0.20-core.jar ]; then - echo "core jar not found. Running 'ant jar'..." - ant jar | grep BUILD; - fi +VERSION=$( ant -q print-version | head -1 | awk '{print $2}' ) +if [ -z "$VERSION" ]; then + echo "Unable to determine Hadoop version" >&2 + exit 1 fi -if [ ! -f build/hadoop-0.20.1-dev-test.jar ]; then - if [ ! -f build/hadoop-0.20-test.jar ]; then - echo "test jar not found. Running 'ant jar-test'..." - ant jar-test | grep BUILD; - fi +TARGETS="" + +CORE_JAR=build/hadoop-$VERSION-core.jar +if [ ! -f $CORE_JAR ]; then + TARGETS="$TARGETS jar" fi +CORE_POM=build/ivy/maven/generated.pom +if [ ! -f $CORE_POM ]; then + TARGETS="$TARGETS makepom" +fi -# -# The names of core/test jar name depend -# on whether they were generated using -# build_all.sh script or just the vanilla -# simple ant jar/jar-test -# -if [ -f build/hadoop-0.20.1-dev-core.jar ]; then - CORE_JAR=build/hadoop-0.20.1-dev-core.jar -else - CORE_JAR=build/hadoop-0.20-core.jar +TEST_JAR=build/hadoop-$VERSION-test.jar +if [ ! -f $TEST_JAR ]; then + TARGETS="$TARGETS jar-test" fi -if [ -f build/hadoop-0.20.1-dev-test.jar ]; then - TEST_JAR=build/hadoop-0.20.1-dev-test.jar -else - TEST_JAR=build/hadoop-0.20-test.jar +if [ -n "$TARGETS" ]; then + ant $TARGETS fi +# Clear the optional flag on Hadoop dependencies so these dependencies can be +# included transitively in other projects. +CORE_POM_MODIFIED=$CORE_POM.new +./edit_generated_pom.py >$CORE_POM_MODIFIED + echo "** Publishing hadoop* core & test jars " echo "** to " echo "** your local maven repo (~/.m2/repository). " echo "** HBase builds will pick up the HDFS* jars from the local maven repo." mvn install:install-file \ - -DgeneratePom=true \ + -DpomFile=$CORE_POM_MODIFIED \ -DgroupId=org.apache.hadoop \ -DartifactId=hadoop-core \ - -Dversion=0.20 \ + -Dversion=$VERSION \ -Dpackaging=jar \ -Dfile=${CORE_JAR} @@ -58,6 +58,7 @@ mvn install:install-file \ -DgeneratePom=true \ -DgroupId=org.apache.hadoop \ -DartifactId=hadoop-test \ - -Dversion=0.20 \ + -Dversion=$VERSION \ -Dpackaging=jar \ -Dfile=${TEST_JAR} + diff --git a/edit_generated_pom.py b/edit_generated_pom.py new file mode 100644 index 00000000..c2f88d39 --- /dev/null +++ b/edit_generated_pom.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +''' +Reads the automatically generated Hadoop pom file, removes the "optional" +flag from dependencies so that they could be included transitively into other +projects such as HBase, and removes certain dependencies that are not required +and could even break the code (e.g. an old version of xerces). Writes the +modified project object model XML to standard output. +''' + +import os +import re +import sys + +from xml.dom.minidom import parse + +NON_TRANSITIVE_DEPS = [ + # Old version, breaks HBase + 'xerces', + + # Not used in production + 'checkstyle', + 'jdiff', + + # A release audit tool, probably not used in prod + 'rat-lib', +] + +POM_FILE = 'build/ivy/maven/generated.pom' +doc = parse(POM_FILE) +deps = doc.getElementsByTagName('dependencies')[0] + +for dep in deps.getElementsByTagName('dependency'): + for c in dep.childNodes: + if (c.nodeName == 'artifactId' and + c.firstChild and + c.firstChild.nodeValue and + c.firstChild.nodeValue.strip() in NON_TRANSITIVE_DEPS): + deps.removeChild(dep) + break + + for o in dep.getElementsByTagName('optional'): + dep.removeChild(o) + +out_lines = doc.toprettyxml(indent=' ' * 2) +lines = [] +for l in out_lines.split('\n'): + l = l.rstrip() + if l: + lines.append(l) +output = '\n'.join(lines) + +# Make sure values stay on the same line: value +output = re.sub( + r'(<([a-zA-Z]+)>)' + r'\s*([^<>]+?)\s*' + r'()', r'\1\3\4', output) + +print output +