You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I get the following error while running the code:
225926Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/sql/execution/datasources/hbase/HBaseTableCatalog$
at com.bde.datalake.test.Prepickup$.main(Prepickup.scala:135)
at com.bde.datalake.test.Prepickup.main(Prepickup.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 12 more
I have added the shc jar, packages and repository to spark submit
The text was updated successfully, but these errors were encountered:
Hello,
I am currently facing certain challenges, when writing to HBase from Spark using shc jar.
Spark 2.1.0
Hbase on cluster 1.2.0
Spark submit statement:
spark2-submit --master yarn --deploy-mode client --files /etc/hbase/conf/hbase-site.xml --class com.bde.datalake.test.Prepickup /home/datalake/tt/bde_datalake_2.11-1.0.jar --jars /home/datalake/tt/shc-core-1.1.0-2.1-s_2.11.jar --conf "spark.driver.extraClassPath=/home/datalake/tt/shc-core-1.1.0-2.1-s_2.11.jar" --packages com.hortonworks:shc:1.1.0-2.1-s_2.11 --repositories http://repo.hortonworks.com/content/repositories/releases/
Following is my code:
package com.bde.datalake.test
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog
import org.apache.spark.storage._
import org.apache.hadoop.hbase.{ TableName, HBaseConfiguration }
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.{ Result, Put }
object Prepickup {
val spark = SparkSession
.builder()
.appName("CT_Spark_parellel_test")
.enableHiveSupport()
.getOrCreate()
import spark.implicits._
spark.sql("set spark.dynamicAllocation.enabled = true")
spark.sql("set spark.shuffle.service.enabled = true")
spark.sql("set spark.dynamicAllocation.minExecutors = 10")
spark.sql("set spark.dynamicAllocation.maxExecutors = 25")
spark.sql("set spark.executor.cores = 5")
spark.sql("set spark.driver.memory = 15g")
spark.sql("set spark.executer.memory = 30g")
spark.sql("set spark.driver.extraClassPath = /fae/conf/hadoop/hbase-site.xml")
def main(args : Array[String]) = {
prepickupDF1.write
.options(Map(HBaseTableCatalog.tableCatalog -> catalog1, HBaseTableCatalog.newTable -> "5"))
.format("org.apache.spark.sql.execution.datasources.hbase")
.save()
}
}
build.sbt
name := "bde_datalake"
version := "1.0"
scalaVersion in ThisBuild := "2.11.8"
val sparkVersion = "2.1.0"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7.1"
dependencyOverrides += "com.fasterxml.jackson.module" % "jackson-module-scala_2.11" % "2.6.7.1"
unmanagedJars in Compile += file("lib/shc-core-1.1.0-2.1-s_2.11.jar")
resolvers += "SparkPackages" at "https://dl.bintray.com/spark-packages/maven/"
resolvers += Resolver.url("bintray-sbt-plugins", url("http://dl.bintray.com/sbt/sbt-plugin-releases"))
resolvers += "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.1.0",
"org.apache.spark" %% "spark-streaming" % "2.1.0",
"org.apache.kafka" % "kafka-clients" % "0.8.2.0",
"org.apache.spark" %% "spark-streaming-kafka" % "1.6.0",
"org.apache.spark" %% "spark-sql" % "2.1.0",
"org.apache.spark" %% "spark-hive" % "2.1.0",
"org.apache.hadoop" % "hadoop-common" % "2.6.0",
//"org.apache.hbase" % "hbase-spark" % "1.2.0-cdh5.13.1",
"org.apache.hbase" % "hbase-common" % "1.1.2",
"org.apache.hbase" % "hbase-client" % "1.1.2",
"org.apache.hbase" % "hbase-server" % "1.1.2",
"org.scala-lang" % "scala-library" % "2.11.8",
"org.scala-lang" % "scala-reflect" % "2.11.8",
"edu.stanford.nlp" % "stanford-corenlp" % "3.7.0" ,
"edu.stanford.nlp" % "stanford-corenlp" % "3.7.0" % "test" classifier "models",
"databricks" % "spark-corenlp" % "0.2.0-s_2.11"
)
resolvers ++= Seq(
"Typesafe" at "http://repo.hortonworks.com/content/repositories/releases/",
"Java.net Maven2 Repository" at "http://download.java.net/maven/2/"
)
fork in run := true
I get the following error while running the code:
225926Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/sql/execution/datasources/hbase/HBaseTableCatalog$
at com.bde.datalake.test.Prepickup$.main(Prepickup.scala:135)
at com.bde.datalake.test.Prepickup.main(Prepickup.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 12 more
I have added the shc jar, packages and repository to spark submit
The text was updated successfully, but these errors were encountered: