From c755b0d910d68e7921807f2f2ac1e3fac7a8f357 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 9 Nov 2017 09:22:33 +0100 Subject: [PATCH] [SPARK-22463][YARN][SQL][HIVE] add hadoop/hive/hbase/etc configuration files in SPARK_CONF_DIR to distribute archive ## What changes were proposed in this pull request? When I ran self contained sql apps, such as ```scala import org.apache.spark.sql.SparkSession object ShowHiveTables { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("Show Hive Tables") .enableHiveSupport() .getOrCreate() spark.sql("show tables").show() spark.stop() } } ``` with **yarn cluster** mode and `hive-site.xml` correctly within `$SPARK_HOME/conf`,they failed to connect the right hive metestore for not seeing hive-site.xml in AM/Driver's classpath. Although submitting them with `--files/--jars local/path/to/hive-site.xml` or puting it to `$HADOOP_CONF_DIR/YARN_CONF_DIR` can make these apps works well in cluster mode as client mode, according to the official doc, see http://spark.apache.org/docs/latest/sql-programming-guide.html#hive-tables > Configuration of Hive is done by placing your hive-site.xml, core-site.xml (for security configuration), and hdfs-site.xml (for HDFS configuration) file in conf/. We may respect these configuration files too or modify the doc for hive-tables in cluster mode. ## How was this patch tested? cc cloud-fan gatorsmile Author: Kent Yao Closes #19663 from yaooqinn/SPARK-21888. --- .../org/apache/spark/deploy/yarn/Client.scala | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 1fe25c4ddaabf..99e7d46ca5c96 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy.yarn -import java.io.{File, FileOutputStream, IOException, OutputStreamWriter} +import java.io.{FileSystem => _, _} import java.net.{InetAddress, UnknownHostException, URI} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets @@ -687,6 +687,19 @@ private[spark] class Client( private def createConfArchive(): File = { val hadoopConfFiles = new HashMap[String, File]() + // SPARK_CONF_DIR shows up in the classpath before HADOOP_CONF_DIR/YARN_CONF_DIR + sys.env.get("SPARK_CONF_DIR").foreach { localConfDir => + val dir = new File(localConfDir) + if (dir.isDirectory) { + val files = dir.listFiles(new FileFilter { + override def accept(pathname: File): Boolean = { + pathname.isFile && pathname.getName.endsWith(".xml") + } + }) + files.foreach { f => hadoopConfFiles(f.getName) = f } + } + } + Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").foreach { envKey => sys.env.get(envKey).foreach { path => val dir = new File(path)