-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
1. During upgrade/downgrade, no user checkpoint override via streamer config. 2. When checkpoint is set via streamer config for hoodie incremental source, it will specify request time based or completion time based. Added relevant checkpoint handling logic accordingly.
- Loading branch information
1 parent
5a13ce1
commit 0a0f3bb
Showing
18 changed files
with
1,102 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
...main/java/org/apache/hudi/common/table/checkpoint/HoodieIncrSourceCheckpointValUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hudi.common.table.checkpoint; | ||
|
||
/** | ||
* Utility class providing methods to check if a string starts with specific resume-related prefixes. | ||
*/ | ||
public class HoodieIncrSourceCheckpointValUtils { | ||
public static final String RESET_CHECKPOINT_V2_SEPARATOR = ":"; | ||
public static final String REQUEST_TIME_PREFIX = "resumeFromInstantRequestTime"; | ||
public static final String COMPLETION_TIME_PREFIX = "resumeFromInstantCompletionTime"; | ||
|
||
/** | ||
* For hoodie incremental source ingestion, if the target table is version 8 or higher, the checkpoint | ||
* key set by streamer config can be in either of the following format: | ||
* - resumeFromInstantRequestTime:[checkpoint value based on request time] | ||
* - resumeFromInstantCompletionTime:[checkpoint value based on completion time] | ||
* | ||
* StreamerCheckpointV2FromCfgCkp class itself captured the fact that this is version 8 and higher, plus | ||
* the checkpoint source is from streamer config override. | ||
* | ||
* When the checkpoint is consumed by individual data sources, we need to convert them to either vanilla | ||
* checkpoint v1 (request time based) or checkpoint v2 (completion time based). | ||
*/ | ||
public static Checkpoint resolveToActualCheckpointVersion(UnresolvedStreamerCheckpointBasedOnCfg checkpoint) { | ||
String[] parts = extractKeyValues(checkpoint); | ||
switch (parts[0]) { | ||
case REQUEST_TIME_PREFIX: { | ||
return new StreamerCheckpointV1(checkpoint).setCheckpointKey(parts[1]); | ||
} | ||
case COMPLETION_TIME_PREFIX: { | ||
return new StreamerCheckpointV2(checkpoint).setCheckpointKey(parts[1]); | ||
} | ||
default: | ||
throw new IllegalArgumentException("Unknown event ordering mode " + parts[0]); | ||
} | ||
} | ||
|
||
private static String [] extractKeyValues(UnresolvedStreamerCheckpointBasedOnCfg checkpoint) { | ||
String checkpointKey = checkpoint.getCheckpointKey(); | ||
String[] parts = checkpointKey.split(RESET_CHECKPOINT_V2_SEPARATOR); | ||
if (parts.length != 2 | ||
|| ( | ||
!parts[0].trim().equals(REQUEST_TIME_PREFIX) | ||
&& !parts[0].trim().equals(COMPLETION_TIME_PREFIX) | ||
)) { | ||
throw new IllegalArgumentException( | ||
"Illegal checkpoint key override `" + checkpointKey + "`. Valid format is either `resumeFromInstantRequestTime:<checkpoint value>` or " | ||
+ "`resumeFromInstantCompletionTime:<checkpoint value>`."); | ||
} | ||
parts[0] = parts[0].trim(); | ||
parts[1] = parts[1].trim(); | ||
return parts; | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
.../java/org/apache/hudi/common/table/checkpoint/UnresolvedStreamerCheckpointBasedOnCfg.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.hudi.common.table.checkpoint; | ||
|
||
import org.apache.hudi.common.model.HoodieCommitMetadata; | ||
|
||
/** | ||
* A special checkpoint v2 class that indicates its checkpoint key comes from streamer config checkpoint | ||
* overrides. | ||
* | ||
* For hoodie incremental source, based on the content of the checkpoint override value, it can indicate | ||
* either request time based checkpoint or completion time based. So the class serves as an indicator to | ||
* data sources of interest that it needs to be further parsed and resolved to either checkpoint v1 or v2. | ||
* | ||
* For all the other data sources, it behaves exactly the same as checkpoint v2. | ||
* | ||
* To keep the checkpoint class design ignorant of which data source it serves, the class only indicates where | ||
* the checkpoint key comes from. | ||
* */ | ||
public class UnresolvedStreamerCheckpointBasedOnCfg extends StreamerCheckpointV2 { | ||
public UnresolvedStreamerCheckpointBasedOnCfg(String key) { | ||
super(key); | ||
} | ||
|
||
public UnresolvedStreamerCheckpointBasedOnCfg(Checkpoint checkpoint) { | ||
super(checkpoint); | ||
} | ||
|
||
public UnresolvedStreamerCheckpointBasedOnCfg(HoodieCommitMetadata commitMetadata) { | ||
super(commitMetadata); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
.../java/org/apache/hudi/common/table/checkpoint/TestHoodieIncrSourceCheckpointValUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.hudi.common.table.checkpoint; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertInstanceOf; | ||
import static org.junit.jupiter.api.Assertions.assertThrows; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
public class TestHoodieIncrSourceCheckpointValUtils { | ||
|
||
@Test | ||
public void testResolveToV1V2CheckpointWithRequestTime() { | ||
String checkpoint = "20240301"; | ||
UnresolvedStreamerCheckpointBasedOnCfg mockCheckpoint = mock(UnresolvedStreamerCheckpointBasedOnCfg.class); | ||
when(mockCheckpoint.getCheckpointKey()).thenReturn("resumeFromInstantRequestTime:" + checkpoint); | ||
|
||
Checkpoint result = HoodieIncrSourceCheckpointValUtils.resolveToActualCheckpointVersion(mockCheckpoint); | ||
|
||
assertInstanceOf(StreamerCheckpointV1.class, result); | ||
assertEquals(checkpoint, result.getCheckpointKey()); | ||
} | ||
|
||
@Test | ||
public void testResolveToV1V2CheckpointWithCompletionTime() { | ||
String checkpoint = "20240302"; | ||
UnresolvedStreamerCheckpointBasedOnCfg mockCheckpoint = mock(UnresolvedStreamerCheckpointBasedOnCfg.class); | ||
when(mockCheckpoint.getCheckpointKey()).thenReturn("resumeFromInstantCompletionTime:" + checkpoint); | ||
|
||
Checkpoint result = HoodieIncrSourceCheckpointValUtils.resolveToActualCheckpointVersion(mockCheckpoint); | ||
|
||
assertInstanceOf(StreamerCheckpointV2.class, result); | ||
assertEquals(checkpoint, result.getCheckpointKey()); | ||
} | ||
|
||
@Test | ||
public void testResolveToV1V2CheckpointWithInvalidPrefix() { | ||
UnresolvedStreamerCheckpointBasedOnCfg mockCheckpoint = mock(UnresolvedStreamerCheckpointBasedOnCfg.class); | ||
when(mockCheckpoint.getCheckpointKey()).thenReturn("invalidPrefix:20240303"); | ||
|
||
IllegalArgumentException exception = assertThrows( | ||
IllegalArgumentException.class, | ||
() -> HoodieIncrSourceCheckpointValUtils.resolveToActualCheckpointVersion(mockCheckpoint) | ||
); | ||
assertTrue(exception.getMessage().contains("Illegal checkpoint key override")); | ||
} | ||
|
||
@Test | ||
public void testResolveToV1V2CheckpointWithMalformedInput() { | ||
UnresolvedStreamerCheckpointBasedOnCfg mockCheckpoint = mock(UnresolvedStreamerCheckpointBasedOnCfg.class); | ||
when(mockCheckpoint.getCheckpointKey()).thenReturn("malformedInput"); | ||
|
||
IllegalArgumentException exception = assertThrows( | ||
IllegalArgumentException.class, | ||
() -> HoodieIncrSourceCheckpointValUtils.resolveToActualCheckpointVersion(mockCheckpoint) | ||
); | ||
assertTrue(exception.getMessage().contains("Illegal checkpoint key override")); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.