-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add api for Retrieving unused segments #15415
Changes from 1 commit
1d135e7
c2d52f2
190a379
49cbe5a
49c1c1a
bf15374
90362c3
af81f10
3950732
002f00e
5ada621
c82ab4a
68a98dd
6675228
82259c6
c92aaf1
817d8be
0ea6b5a
ff0e5c5
a23b044
d6685a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -687,7 +687,7 @@ private int doMarkAsUsedNonOvershadowedSegments(String dataSourceName, @Nullable | |||||
} | ||||||
|
||||||
try (final CloseableIterator<DataSegment> iterator = | ||||||
queryTool.retrieveUnusedSegments(dataSourceName, intervals, null, null)) { | ||||||
queryTool.retrieveUnusedSegments(dataSourceName, intervals, null, null, null)) { | ||||||
while (iterator.hasNext()) { | ||||||
final DataSegment dataSegment = iterator.next(); | ||||||
timeline.addSegments(Iterators.singletonIterator(dataSegment)); | ||||||
|
@@ -956,12 +956,30 @@ public Optional<Iterable<DataSegment>> iterateAllUsedNonOvershadowedSegmentsForD | |||||
.transform(timeline -> timeline.findNonOvershadowedObjectsInInterval(interval, Partitions.ONLY_COMPLETE)); | ||||||
} | ||||||
|
||||||
/** | ||||||
* Retrieves segments for a given datasource that are marked unused and that are *fully contained by* any interval | ||||||
* in a particular collection of intervals. If the collection of intervals is empty, this method will retrieve all | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, copy-pasta here and below? This method only takes a single interval, right? (not a collection) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||||||
* unused segments. | ||||||
* | ||||||
* This call does not return any information about realtime segments. | ||||||
* | ||||||
* @param datasource The name of the datasource | ||||||
* @param interval The intervals to search over | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||||||
* @param limit The limit of segments to return | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||||||
* @param offset The offset to use when retrieving matching segments. | ||||||
* @param orderByStartEnd Specifies the order with which to return the matching segments by start time, end time. A | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An enum type would be a better choice for the direction of order by? Similar to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Added new enum |
||||||
* value of less than or equal to 0, specifies a descending order, while a value of greater | ||||||
* than 0 specifies an ascending order. A null value indicates that order does not matter. | ||||||
|
||||||
* Returns an iterable. | ||||||
*/ | ||||||
@Override | ||||||
public Iterable<DataSegment> iterateAllUnusedSegmentsForDatasource( | ||||||
String datasource, | ||||||
@Nullable Interval interval, | ||||||
@Nullable Integer limit, | ||||||
@Nullable Integer offset | ||||||
final String datasource, | ||||||
@Nullable final Interval interval, | ||||||
@Nullable final Integer limit, | ||||||
@Nullable final Integer offset, | ||||||
@Nullable final Integer orderByStartEnd | ||||||
) | ||||||
{ | ||||||
return connector.inReadOnlyTransaction( | ||||||
|
@@ -974,7 +992,7 @@ public Iterable<DataSegment> iterateAllUnusedSegmentsForDatasource( | |||||
? Intervals.ONLY_ETERNITY | ||||||
: Collections.singletonList(interval); | ||||||
try (final CloseableIterator<DataSegment> iterator = | ||||||
queryTool.retrieveUnusedSegments(datasource, intervals, limit, offset)) { | ||||||
queryTool.retrieveUnusedSegments(datasource, intervals, limit, offset, orderByStartEnd)) { | ||||||
return ImmutableList.copyOf(iterator); | ||||||
} | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1209,6 +1209,7 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndNoLimit() throws | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()), | ||
null, | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(segments.size(), actualUnusedSegments.size()); | ||
|
@@ -1224,6 +1225,7 @@ public void testRetrieveUnusedSegmentsUsingNoIntervalsNoLimitNoOffset() throws I | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList.of(), | ||
null, | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(segments.size(), actualUnusedSegments.size()); | ||
|
@@ -1237,16 +1239,38 @@ public void testRetrieveUnusedSegmentsUsingNoIntervalsAndNoLimitAndOffset() thro | |
markAllSegmentsUnused(new HashSet<>(segments)); | ||
|
||
int offset = 10; | ||
final List<DataSegment> expectedSegments = segments.stream() | ||
final List<DataSegment> expectedSegmentsAscOrder = segments.stream() | ||
.skip(offset) | ||
.collect(Collectors.toList()); | ||
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList.of(), | ||
null, | ||
offset, | ||
null | ||
); | ||
Assert.assertEquals(expectedSegmentsAscOrder.size(), actualUnusedSegments.size()); | ||
Assert.assertTrue(expectedSegmentsAscOrder.containsAll(actualUnusedSegments)); | ||
|
||
actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList.of(), | ||
null, | ||
offset | ||
offset, | ||
1 | ||
); | ||
Assert.assertEquals(expectedSegments.size(), actualUnusedSegments.size()); | ||
Assert.assertTrue(expectedSegments.containsAll(actualUnusedSegments)); | ||
Assert.assertEquals(expectedSegmentsAscOrder.size(), actualUnusedSegments.size()); | ||
Assert.assertTrue(expectedSegmentsAscOrder.containsAll(actualUnusedSegments)); | ||
|
||
final List<DataSegment> expectedSegmentsDescOrder = new ArrayList<>(expectedSegmentsAscOrder); | ||
Collections.reverse(expectedSegmentsDescOrder); | ||
|
||
actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList.of(), | ||
null, | ||
offset, | ||
-1 | ||
); | ||
Assert.assertEquals(expectedSegmentsDescOrder.size(), actualUnusedSegments.size()); | ||
Assert.assertTrue(expectedSegmentsDescOrder.containsAll(actualUnusedSegments)); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For test coverage, can we also please include new or extend existing tests for the following scenarios:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added |
||
@Test | ||
|
@@ -1258,6 +1282,7 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitAtRange() th | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()), | ||
segments.size(), | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(segments.size(), actualUnusedSegments.size()); | ||
|
@@ -1274,6 +1299,7 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitInRange() th | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().limit(requestedLimit).map(DataSegment::getInterval).collect(Collectors.toList()), | ||
requestedLimit, | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(requestedLimit, actualUnusedSegments.size()); | ||
|
@@ -1291,7 +1317,8 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsInSingleBatchLimitAn | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().limit(requestedLimit).map(DataSegment::getInterval).collect(Collectors.toList()), | ||
requestedLimit, | ||
offset | ||
offset, | ||
null | ||
); | ||
Assert.assertEquals(segments.size() - offset, actualUnusedSegments.size()); | ||
// offset used when number of intervals does not require multiple batches | ||
|
@@ -1309,7 +1336,8 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffsetInRang | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().limit(requestedLimit).map(DataSegment::getInterval).collect(Collectors.toList()), | ||
requestedLimit, | ||
offset | ||
offset, | ||
null | ||
); | ||
Assert.assertEquals(requestedLimit, actualUnusedSegments.size()); | ||
// offset not used when number of intervals requires multiple batches | ||
|
@@ -1325,6 +1353,7 @@ public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitOutOfRange() | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()), | ||
segments.size() + 1, | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(segments.size(), actualUnusedSegments.size()); | ||
|
@@ -1344,6 +1373,7 @@ public void testRetrieveUnusedSegmentsUsingIntervalOutOfRange() throws IOExcepti | |
final ImmutableList<DataSegment> actualUnusedSegments = retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
ImmutableList.of(outOfRangeInterval), | ||
null, | ||
null, | ||
null | ||
); | ||
Assert.assertEquals(0, actualUnusedSegments.size()); | ||
|
@@ -3126,7 +3156,8 @@ private List<DataSegment> createAndGetUsedYearSegments(final int startYear, fina | |
private ImmutableList<DataSegment> retrieveUnusedSegmentsUsingMultipleIntervalsLimitAndOffset( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no offset anymore; maybe we can simply call this method |
||
final List<Interval> intervals, | ||
final Integer limit, | ||
final Integer offset | ||
final Integer offset, | ||
final Integer orderByStartEnd | ||
) | ||
{ | ||
return derbyConnector.inReadOnlyTransaction( | ||
|
@@ -3138,7 +3169,7 @@ private ImmutableList<DataSegment> retrieveUnusedSegmentsUsingMultipleIntervalsL | |
derbyConnectorRule.metadataTablesConfigSupplier().get(), | ||
mapper | ||
) | ||
.retrieveUnusedSegments(DS.WIKI, intervals, limit, offset)) { | ||
.retrieveUnusedSegments(DS.WIKI, intervals, limit, offset, orderByStartEnd)) { | ||
return ImmutableList.copyOf(iterator); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be a good idea to go ahead and add a sort order now too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Good suggestion. Added