Skip to content

Commit

Permalink
Record previous HEADs of named references (#7607)
Browse files Browse the repository at this point in the history
Persisted `Reference`s now keep a history of previous HEADs, limited in size and age, defaults to up to 20 elementa with a maximum age of 300 seconds.

Exposes a new Nessie-API V2 endpoint to retrieve the status of a reference. The returned information contains a consistency status of the HEAD commit, the recorded recent HEADs and, if requested, a combined status for the recent commits in the commit log.

This change also ensures that a "broken" reference can be re-assigned to another existing commit, and that it can be deleted.
  • Loading branch information
snazy authored Oct 13, 2023
1 parent d8eeef4 commit 6720656
Show file tree
Hide file tree
Showing 61 changed files with 3,065 additions and 67 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ as necessary. Empty sections will not end in the release notes.
- Ability to export repositories in V1 format. This is useful for migrating repositories to older
Nessie servers that do not support the new storage model.
- Added support for Spark 3.5, removed support for Spark 3.1 - along with the version bump of Apache Iceberg to 1.4.0.
- Functionality that records current-HEAD changes of named references and APIs to expose the information.
This is useful to recover from a scenario when a "primary data center/region/zone" has been lost and
replication of a distributed database has been interrupted.

### Changes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,13 @@ default DeleteNamespaceBuilder deleteNamespace() {
default UpdateNamespaceBuilder updateProperties() {
return new ClientSideUpdateNamespace(this);
}

/**
* Retrieve the recorded recent history of a reference.
*
* <p>A reference's history is a size and time limited record of changes of the reference's
* current pointer, aka HEAD. The size and time limits are configured in the Nessie server
* configuration.
*/
ReferenceHistoryBuilder referenceHistory();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.client.api;

import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import javax.validation.constraints.Pattern;
import org.projectnessie.error.NessieNotFoundException;
import org.projectnessie.model.ReferenceHistoryResponse;
import org.projectnessie.model.Validation;

public interface ReferenceHistoryBuilder {
ReferenceHistoryBuilder refName(
@NotNull
@jakarta.validation.constraints.NotNull
@Pattern(regexp = Validation.REF_NAME_REGEX, message = Validation.REF_NAME_MESSAGE)
@jakarta.validation.constraints.Pattern(
regexp = Validation.REF_NAME_REGEX,
message = Validation.REF_NAME_MESSAGE)
String refName);

ReferenceHistoryBuilder headCommitsToScan(
@Nullable @jakarta.annotation.Nullable Integer headCommitsToScan);

ReferenceHistoryResponse get() throws NessieNotFoundException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.projectnessie.client.api.GetRepositoryConfigBuilder;
import org.projectnessie.client.api.MergeReferenceBuilder;
import org.projectnessie.client.api.NessieApiV2;
import org.projectnessie.client.api.ReferenceHistoryBuilder;
import org.projectnessie.client.api.TransplantCommitsBuilder;
import org.projectnessie.client.api.UpdateRepositoryConfigBuilder;
import org.projectnessie.client.http.HttpClient;
Expand Down Expand Up @@ -89,6 +90,11 @@ public GetReferenceBuilder getReference() {
return new HttpGetReference(client);
}

@Override
public ReferenceHistoryBuilder referenceHistory() {
return new HttpReferenceHistory(client);
}

@Override
public GetEntriesBuilder getEntries() {
return new HttpGetEntries(client);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright (C) 2020 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.client.rest.v2;

import org.projectnessie.client.api.ReferenceHistoryBuilder;
import org.projectnessie.client.http.HttpClient;
import org.projectnessie.error.NessieNotFoundException;
import org.projectnessie.model.ReferenceHistoryResponse;

final class HttpReferenceHistory implements ReferenceHistoryBuilder {

private final HttpClient client;

HttpReferenceHistory(HttpClient client) {
this.client = client;
}

private String refName;
private Integer headCommitsToScan;

@Override
public ReferenceHistoryBuilder refName(String refName) {
this.refName = refName;
return this;
}

@Override
public ReferenceHistoryBuilder headCommitsToScan(Integer headCommitsToScan) {
this.headCommitsToScan = headCommitsToScan;
return this;
}

@Override
public ReferenceHistoryResponse get() throws NessieNotFoundException {
return client
.newRequest()
.path("trees/{ref}/recent-changes")
.queryParam("scan-commits", headCommitsToScan)
.resolveTemplate("ref", refName)
.unwrap(NessieNotFoundException.class)
.get()
.readEntity(ReferenceHistoryResponse.class);
}
}
14 changes: 14 additions & 0 deletions api/model/src/main/java/org/projectnessie/api/v2/TreeApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.projectnessie.api.v2.params.EntriesParams;
import org.projectnessie.api.v2.params.GetReferenceParams;
import org.projectnessie.api.v2.params.Merge;
import org.projectnessie.api.v2.params.ReferenceHistoryParams;
import org.projectnessie.api.v2.params.ReferencesParams;
import org.projectnessie.api.v2.params.Transplant;
import org.projectnessie.error.NessieConflictException;
Expand All @@ -41,6 +42,7 @@
import org.projectnessie.model.MergeResponse;
import org.projectnessie.model.Operations;
import org.projectnessie.model.Reference;
import org.projectnessie.model.ReferenceHistoryResponse;
import org.projectnessie.model.ReferencesResponse;
import org.projectnessie.model.SingleReferenceResponse;
import org.projectnessie.model.Validation;
Expand Down Expand Up @@ -109,6 +111,18 @@ SingleReferenceResponse getReferenceByName(
GetReferenceParams params)
throws NessieNotFoundException;

/**
* Retrieve the recorded recent history of a reference.
*
* <p>A reference's history is a size and time limited record of changes of the reference's
* current pointer, aka HEAD. The size and time limits are configured in the Nessie server
* configuration.
*/
ReferenceHistoryResponse getReferenceHistory(
@Valid @jakarta.validation.Valid @NotNull @jakarta.validation.constraints.NotNull
ReferenceHistoryParams params)
throws NessieNotFoundException;

/**
* Retrieve objects for a ref, potentially truncated by the backend.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.projectnessie.api.v2.params.EntriesParams;
import org.projectnessie.api.v2.params.GetReferenceParams;
import org.projectnessie.api.v2.params.Merge;
import org.projectnessie.api.v2.params.ReferenceHistoryParams;
import org.projectnessie.api.v2.params.ReferencesParams;
import org.projectnessie.api.v2.params.Transplant;
import org.projectnessie.error.NessieConflictException;
Expand All @@ -70,6 +71,7 @@
import org.projectnessie.model.MergeResponse;
import org.projectnessie.model.Operations;
import org.projectnessie.model.Reference;
import org.projectnessie.model.ReferenceHistoryResponse;
import org.projectnessie.model.ReferencesResponse;
import org.projectnessie.model.SingleReferenceResponse;
import org.projectnessie.model.Validation;
Expand Down Expand Up @@ -194,6 +196,48 @@ SingleReferenceResponse createReference(
SingleReferenceResponse getReferenceByName(
@BeanParam @jakarta.ws.rs.BeanParam GetReferenceParams params) throws NessieNotFoundException;

@GET
@jakarta.ws.rs.GET
@Produces(MediaType.APPLICATION_JSON)
@jakarta.ws.rs.Produces(jakarta.ws.rs.core.MediaType.APPLICATION_JSON)
@Path("{ref:" + REF_NAME_PATH_ELEMENT_REGEX + "}/recent-changes")
@jakarta.ws.rs.Path("{ref:" + REF_NAME_PATH_ELEMENT_REGEX + "}/recent-changes")
@Operation(
summary = "Fetch recent pointer changes of a reference",
operationId = "getReferenceHistory",
description =
"Retrieve the recorded recent history of a reference.\n"
+ "\n"
+ "A reference's history is a size and time limited record of changes of the reference's "
+ "current pointer, aka HEAD. The size and time limits are configured in the Nessie server "
+ "configuration.\n"
+ "\n"
+ "This function is only useful for deployments using replicating multi-zone/region database "
+ "setups. If the \"primary write target\" fails and cannot be recovered, replicas might not "
+ "have all written records (data loss scenario). This function helps identifying whether "
+ "the commits of a reference that were written within the configured \"replication lag\" are "
+ "present and consistent. A reference might then be deleted or re-assigned to a consistent commit.")
@APIResponses({
@APIResponse(
responseCode = "200",
description = "Found and returned reference.",
content = {
@Content(
mediaType = MediaType.APPLICATION_JSON,
examples = {@ExampleObject(ref = "referenceHistoryResponse")},
schema = @Schema(implementation = ReferenceHistoryResponse.class))
}),
@APIResponse(responseCode = "400", description = "Invalid input, ref name not valid"),
@APIResponse(responseCode = "401", description = "Invalid credentials provided"),
@APIResponse(responseCode = "403", description = "Not allowed to view the given reference"),
@APIResponse(responseCode = "404", description = "Reference not found")
})
@JsonView(Views.V2.class)
@Override
ReferenceHistoryResponse getReferenceHistory(
@BeanParam @jakarta.ws.rs.BeanParam ReferenceHistoryParams params)
throws NessieNotFoundException;

@Override
@GET
@jakarta.ws.rs.GET
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.api.v2.params;

import static org.projectnessie.api.v2.doc.ApiDoc.REF_GET_PARAMETER_DESCRIPTION;

import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import javax.validation.constraints.Pattern;
import javax.ws.rs.PathParam;
import javax.ws.rs.QueryParam;
import org.eclipse.microprofile.openapi.annotations.media.ExampleObject;
import org.eclipse.microprofile.openapi.annotations.parameters.Parameter;
import org.immutables.builder.Builder.Constructor;
import org.projectnessie.model.Validation;

public class ReferenceHistoryParams {

@Parameter(
description = REF_GET_PARAMETER_DESCRIPTION,
examples = {@ExampleObject(ref = "ref"), @ExampleObject(ref = "refDefault")})
@PathParam("ref")
@jakarta.ws.rs.PathParam("ref")
@NotNull
@jakarta.validation.constraints.NotNull
@Pattern(regexp = Validation.REF_NAME_PATH_REGEX, message = Validation.REF_NAME_MESSAGE)
@jakarta.validation.constraints.Pattern(
regexp = Validation.REF_NAME_PATH_REGEX,
message = Validation.REF_NAME_MESSAGE)
private String ref;

@Parameter(
description =
"Optional parameter, specifies the number of commits to scan from the reference's current HEAD, "
+ "limited to the given amount of commits. Default is to not scan the commit log. The server "
+ "may impose a hard limit on the amount of commits from the commit log.")
@QueryParam("scan-commits")
@jakarta.ws.rs.QueryParam("scan-commits")
@Nullable
@jakarta.annotation.Nullable
private Integer headCommitsToScan;

public ReferenceHistoryParams() {}

@Constructor
ReferenceHistoryParams(
@NotNull @jakarta.validation.constraints.NotNull String ref,
@Nullable @jakarta.annotation.Nullable Integer headCommitsToScan) {
this.ref = ref;
this.headCommitsToScan = headCommitsToScan;
}

@Nullable
@jakarta.annotation.Nullable
public Integer headCommitsToScan() {
return headCommitsToScan;
}

public String getRef() {
return ref;
}

public static ReferenceHistoryParamsBuilder builder() {
return new ReferenceHistoryParamsBuilder();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.model;

public enum CommitConsistency {
// NOTE: the order of the values represents the "health" of a commit, best to worst.

/** Consistency was not checked. */
NOT_CHECKED,
/** The commit object, its index information and all reachable content is present. */
COMMIT_CONSISTENT,
/**
* The commit object is present and its index is accessible, but some content reachable from the
* commit is not present.
*/
COMMIT_CONTENT_INCONSISTENT,
/**
* The commit is inconsistent in a way that makes it impossible to access the commit, for example
* if the commit object itself or its index information is missing.
*/
COMMIT_INCONSISTENT
}
Loading

0 comments on commit 6720656

Please sign in to comment.