Skip to content

Commit

Permalink
Adding Spark 3.2 Connector (GoogleCloudDataproc#860)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrabinowitz authored Jan 13, 2023
1 parent 1b94b15 commit 86e3c67
Show file tree
Hide file tree
Showing 22 changed files with 363 additions and 15 deletions.
19 changes: 15 additions & 4 deletions cloudbuild/cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ steps:
# 4b. Run integration tests concurrently with unit tests (DSv1, Scala 2.13)
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
id: 'integration-tests-2.13'
waitFor: ['integration-tests-2.12']
waitFor: ['unit-tests']
entrypoint: 'bash'
args: ['/workspace/cloudbuild/presubmit.sh', 'integrationtest-2.13']
env:
Expand All @@ -47,7 +47,7 @@ steps:
# 4c. Run integration tests concurrently with unit tests (DSv2, Spark 2.4)
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
id: 'integration-tests-2.4'
waitFor: ['unit-tests']
waitFor: ['integration-tests-2.12']
entrypoint: 'bash'
args: ['/workspace/cloudbuild/presubmit.sh', 'integrationtest-2.4']
env:
Expand All @@ -58,18 +58,29 @@ steps:
# 4d. Run integration tests concurrently with unit tests (DSv2, Spark 3.1)
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
id: 'integration-tests-3.1'
waitFor: ['integration-tests-2.4']
waitFor: ['integration-tests-2.13']
entrypoint: 'bash'
args: ['/workspace/cloudbuild/presubmit.sh', 'integrationtest-3.1']
env:
- 'GOOGLE_CLOUD_PROJECT=${_GOOGLE_CLOUD_PROJECT}'
- 'TEMPORARY_GCS_BUCKET=${_TEMPORARY_GCS_BUCKET}'
- 'BIGLAKE_CONNECTION_ID=${_BIGLAKE_CONNECTION_ID}'

# 4c. Run integration tests concurrently with unit tests (DSv2, Spark 3.2)
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
id: 'integration-tests-3.2'
waitFor: ['integration-tests-2.4']
entrypoint: 'bash'
args: ['/workspace/cloudbuild/presubmit.sh', 'integrationtest-3.2']
env:
- 'GOOGLE_CLOUD_PROJECT=${_GOOGLE_CLOUD_PROJECT}'
- 'TEMPORARY_GCS_BUCKET=${_TEMPORARY_GCS_BUCKET}'
- 'BIGLAKE_CONNECTION_ID=${_BIGLAKE_CONNECTION_ID}'

# 5. Upload coverage to CodeCov
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
id: 'upload-it-to-codecov'
waitFor: ['integration-tests-2.12','integration-tests-2.13','integration-tests-2.4','integration-tests-3.1']
waitFor: ['integration-tests-2.12','integration-tests-2.13','integration-tests-2.4','integration-tests-3.1','integration-tests-3.2']
entrypoint: 'bash'
args: ['/workspace/cloudbuild/presubmit.sh', 'upload-it-to-codecov']
env:
Expand Down
4 changes: 2 additions & 2 deletions cloudbuild/nightly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ $MVN install -DskipTests -Pdsv1,dsv2
#coverage report
$MVN test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1,dsv2
# Run integration tests
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv1,dsv2_2.4,dsv2_3.1
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv1,dsv2_2.4,dsv2_3.1,dsv2_3.2
# Run acceptance tests
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,acceptance,dsv1,dsv2_2.4,dsv2_3.1
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,acceptance,dsv1,dsv2_2.4,dsv2_3.1,dsv2_3.2
# Upload test coverage report to Codecov
bash <(curl -s https://codecov.io/bash) -K -F "nightly"

Expand Down
3 changes: 3 additions & 0 deletions cloudbuild/nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ steps:
gsutil cp "/workspace/.repository/com/google/cloud/spark/spark-3.1-bigquery/$revision-preview/spark-3.1-bigquery-$revision-preview.jar" "gs://$bucket"
gsutil cp "gs://$bucket/spark-3.1-bigquery-$revision-preview.jar" "gs://$bucket/spark-3.1-bigquery-nightly-snapshot-preview.jar"
gsutil cp "/workspace/.repository/com/google/cloud/spark/spark-3.2-bigquery/$revision-preview/spark-3.2-bigquery-$revision-preview.jar" "gs://$bucket"
gsutil cp "gs://$bucket/spark-3.2-bigquery-$revision-preview.jar" "gs://$bucket/spark-3.2-bigquery-nightly-snapshot-preview.jar"
- name: 'gcr.io/cloud-builders/docker'
id: 'docker-push'
args: ['push', 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-nightly:latest']
Expand Down
9 changes: 7 additions & 2 deletions cloudbuild/presubmit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ case $STEP in
# Download maven and all the dependencies
init)
checkenv
$MVN install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_2.4,dsv2_3.1
$MVN install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_2.4,dsv2_3.1,dsv2_3.2
exit
;;

# Run unit tests
unittest)
$MVN test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_2.4,dsv2_3.1
$MVN test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_2.4,dsv2_3.1,dsv2_3.2
# Upload test coverage report to Codecov
bash <(curl -s https://codecov.io/bash) -K -F "${STEP}"
;;
Expand All @@ -64,6 +64,11 @@ case $STEP in
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv2_3.1
;;

# Run integration tests
integrationtest-3.2)
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv2_3.2
;;

upload-it-to-codecov)
checkenv
# Upload test coverage report to Codecov
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/*
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -15,11 +15,6 @@
*/
package com.google.cloud.spark.bigquery.v2;

public class Spark32BigQueryTableProvider extends Spark31BigQueryTableProvider {
public class Spark32BigQueryTableProvider extends Spark31BigQueryTableProvider {
// empty
}





Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.acceptance;

public class Spark32BigNumericDataprocServerlessAcceptanceTest
extends BigNumericDataprocServerlessAcceptanceTestBase {

public Spark32BigNumericDataprocServerlessAcceptanceTest() {
super("spark-3.2-bigquery", "1.0");
}

// tests from superclass

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.acceptance;

import java.util.Collections;
import org.junit.AfterClass;
import org.junit.BeforeClass;

public class Spark32DataprocImage21AcceptanceTest extends DataprocAcceptanceTestBase {

private static AcceptanceTestContext context;

public Spark32DataprocImage21AcceptanceTest() {
super(context, false);
}

@BeforeClass
public static void setup() throws Exception {
context =
DataprocAcceptanceTestBase.setup(
"2.1-debian11", "spark-3.2-bigquery", Collections.emptyList());
}

@AfterClass
public static void tearDown() throws Exception {
DataprocAcceptanceTestBase.tearDown(context);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.acceptance;

import org.junit.AfterClass;
import org.junit.BeforeClass;

public class Spark32DataprocImage21DisableConscryptAcceptanceTest
extends DataprocAcceptanceTestBase {

private static AcceptanceTestContext context;

public Spark32DataprocImage21DisableConscryptAcceptanceTest() {
super(context, false);
}

@BeforeClass
public static void setup() throws Exception {
context =
DataprocAcceptanceTestBase.setup(
"2.1-debian11", "spark-3.2-bigquery", DISABLE_CONSCRYPT_LIST);
}

@AfterClass
public static void tearDown() throws Exception {
DataprocAcceptanceTestBase.tearDown(context);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.acceptance;

public class Spark32ReadSheakspeareDataprocServerlessAcceptanceTest
extends ReadSheakspeareDataprocServerlessAcceptanceTestBase {

public Spark32ReadSheakspeareDataprocServerlessAcceptanceTest() {
super("spark-3.2-bigquery", "1.0");
}

// tests from superclass

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.acceptance;

import org.junit.Ignore;

@Ignore // spark-3.2-bigquery does not support streaming yet
public class Spark32WriteStreamDataprocServerlessAcceptanceTest
extends WriteStreamDataprocServerlessAcceptanceTestBase {

public Spark32WriteStreamDataprocServerlessAcceptanceTest() {
super("spark-3.2-bigquery", "1.0");
}

// tests from superclass

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2018 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.integration;

import com.google.cloud.spark.bigquery.SparkBigQueryConfig;

public class Spark32DirectWriteIntegrationTest extends WriteIntegrationTestBase {

public Spark32DirectWriteIntegrationTest() {
super(SparkBigQueryConfig.WriteMethod.DIRECT);
}

// tests from superclass
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2018 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.integration;

import com.google.cloud.spark.bigquery.SparkBigQueryConfig;
import org.junit.Before;

public class Spark32IndirectWriteIntegrationTest extends WriteIntegrationTestBase {

public Spark32IndirectWriteIntegrationTest() {
super(SparkBigQueryConfig.WriteMethod.INDIRECT);
}

@Before
public void setParquetLoadBehaviour() {
// TODO: make this the default value
spark.conf().set("enableListInference", "true");
}

// tests from superclass

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spark.bigquery.integration;

import com.google.cloud.spark.bigquery.BigQueryConnectorUtils;
import org.junit.Before;

public class Spark32QueryPushdownIntegrationTest extends QueryPushdownIntegrationTestBase {

@Before
public void before() {
BigQueryConnectorUtils.enablePushdownSession(spark);
}
}
Loading

0 comments on commit 86e3c67

Please sign in to comment.