Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: update API to 0.25 & separate QueryCursor class #79

Merged
merged 2 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions scripts/jextract.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,44 @@ $package = 'io.github.treesitter.jtreesitter.internal'
$output = "$($args[1])/generated-sources/jextract"
$lib = "$($args[0])/core/lib"

& jextract.bat `
& jextract.ps1 `
--include-struct TSInput `
--include-struct TSInputEdit `
--include-struct TSLogger `
--include-struct TSNode `
--include-struct TSParseOptions `
--include-struct TSParseState `
--include-struct TSPoint `
--include-struct TSQueryCapture `
--include-struct TSQueryCursorOptions `
--include-struct TSQueryCursorState `
--include-struct TSQueryMatch `
--include-struct TSQueryPredicateStep `
--include-struct TSQueryPredicateStepType `
--include-struct TSLanguageMetadata `
--include-struct TSRange `
--include-struct TSTreeCursor `
--include-function free `
--include-function malloc `
--include-function calloc `
--include-function realloc `
--include-function ts_set_allocator `
--include-function ts_language_abi_version `
--include-function ts_language_copy `
--include-function ts_language_delete `
--include-function ts_language_field_count `
--include-function ts_language_field_id_for_name `
--include-function ts_language_field_name_for_id `
--include-function ts_language_metadata `
--include-function ts_language_name `
--include-function ts_language_next_state `
--include-function ts_language_state_count `
--include-function ts_language_subtypes `
--include-function ts_language_supertypes `
--include-function ts_language_symbol_count `
--include-function ts_language_symbol_for_name `
--include-function ts_language_symbol_name `
--include-function ts_language_symbol_type `
--include-function ts_language_version `
--include-function ts_lookahead_iterator_current_symbol `
--include-function ts_lookahead_iterator_current_symbol_name `
--include-function ts_lookahead_iterator_delete `
Expand Down Expand Up @@ -90,6 +99,7 @@ $lib = "$($args[0])/core/lib"
--include-function ts_parser_parse `
--include-function ts_parser_parse_string `
--include-function ts_parser_parse_string_encoding `
--include-function ts_parser_parse_with_options `
--include-function ts_parser_print_dot_graphs `
--include-function ts_parser_reset `
--include-function ts_parser_set_cancellation_flag `
Expand All @@ -104,6 +114,7 @@ $lib = "$($args[0])/core/lib"
--include-function ts_query_cursor_delete `
--include-function ts_query_cursor_did_exceed_match_limit `
--include-function ts_query_cursor_exec `
--include-function ts_query_cursor_exec_with_options `
--include-function ts_query_cursor_match_limit `
--include-function ts_query_cursor_new `
--include-function ts_query_cursor_next_capture `
Expand Down Expand Up @@ -157,7 +168,9 @@ $lib = "$($args[0])/core/lib"
--include-function ts_tree_root_node_with_offset `
--include-constant TREE_SITTER_LANGUAGE_VERSION `
--include-constant TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION `
--include-constant TSInputEncodingUTF16 `
--include-constant TSInputEncodingCustom `
--include-constant TSInputEncodingUTF16BE `
--include-constant TSInputEncodingUTF16LE `
--include-constant TSInputEncodingUTF8 `
--include-constant TSLogTypeLex `
--include-constant TSLogTypeParse `
Expand All @@ -180,6 +193,7 @@ $lib = "$($args[0])/core/lib"
--include-constant TSSymbolTypeAuxiliary `
--include-constant TSSymbolTypeRegular `
--include-constant TSSymbolTypeSupertype `
--include-typedef DecodeFunction `
--header-class-name TreeSitter `
--output $output `
-t $package `
Expand Down
18 changes: 16 additions & 2 deletions scripts/jextract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,39 @@ exec jextract \
--include-struct TSInputEdit \
--include-struct TSLogger \
--include-struct TSNode \
--include-struct TSParseOptions \
--include-struct TSParseState \
--include-struct TSPoint \
--include-struct TSQueryCapture \
--include-struct TSQueryCursorOptions \
--include-struct TSQueryCursorState \
--include-struct TSQueryMatch \
--include-struct TSQueryPredicateStep \
--include-struct TSQueryPredicateStepType \
--include-struct TSLanguageMetadata \
--include-struct TSRange \
--include-struct TSTreeCursor \
--include-function free \
--include-function malloc \
--include-function calloc \
--include-function realloc \
--include-function ts_set_allocator \
--include-function ts_language_abi_version \
--include-function ts_language_copy \
--include-function ts_language_delete \
--include-function ts_language_field_count \
--include-function ts_language_field_id_for_name \
--include-function ts_language_field_name_for_id \
--include-function ts_language_metadata \
--include-function ts_language_name \
--include-function ts_language_next_state \
--include-function ts_language_state_count \
--include-function ts_language_subtypes \
--include-function ts_language_supertypes \
--include-function ts_language_symbol_count \
--include-function ts_language_symbol_for_name \
--include-function ts_language_symbol_name \
--include-function ts_language_symbol_type \
--include-function ts_language_version \
--include-function ts_lookahead_iterator_current_symbol \
--include-function ts_lookahead_iterator_current_symbol_name \
--include-function ts_lookahead_iterator_delete \
Expand Down Expand Up @@ -92,6 +101,7 @@ exec jextract \
--include-function ts_parser_parse \
--include-function ts_parser_parse_string \
--include-function ts_parser_parse_string_encoding \
--include-function ts_parser_parse_with_options \
--include-function ts_parser_print_dot_graphs \
--include-function ts_parser_reset \
--include-function ts_parser_set_cancellation_flag \
Expand All @@ -106,6 +116,7 @@ exec jextract \
--include-function ts_query_cursor_delete \
--include-function ts_query_cursor_did_exceed_match_limit \
--include-function ts_query_cursor_exec \
--include-function ts_query_cursor_exec_with_options \
--include-function ts_query_cursor_match_limit \
--include-function ts_query_cursor_new \
--include-function ts_query_cursor_next_capture \
Expand Down Expand Up @@ -159,7 +170,9 @@ exec jextract \
--include-function ts_tree_root_node_with_offset \
--include-constant TREE_SITTER_LANGUAGE_VERSION \
--include-constant TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION \
--include-constant TSInputEncodingUTF16 \
--include-constant TSInputEncodingCustom \
--include-constant TSInputEncodingUTF16BE \
--include-constant TSInputEncodingUTF16LE \
--include-constant TSInputEncodingUTF8 \
--include-constant TSLogTypeLex \
--include-constant TSLogTypeParse \
Expand All @@ -182,6 +195,7 @@ exec jextract \
--include-constant TSSymbolTypeAuxiliary \
--include-constant TSSymbolTypeRegular \
--include-constant TSSymbolTypeSupertype \
--include-typedef DecodeFunction \
--header-class-name TreeSitter \
--output "$output" \
-t "$package" \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package io.github.treesitter.jtreesitter;

import static io.github.treesitter.jtreesitter.internal.TreeSitter.C_INT;
import static io.github.treesitter.jtreesitter.internal.TreeSitter.ts_query_cursor_next_capture;

import io.github.treesitter.jtreesitter.internal.TSQueryMatch;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.SegmentAllocator;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.BiPredicate;
import java.util.function.Consumer;
import org.jspecify.annotations.NullMarked;
import org.jspecify.annotations.Nullable;

@NullMarked
class CapturesIterator extends Spliterators.AbstractSpliterator<SimpleImmutableEntry<Integer, QueryMatch>> {
private final @Nullable BiPredicate<QueryPredicate, QueryMatch> predicate;
private final Tree tree;
private final SegmentAllocator allocator;
private final Query query;
private final MemorySegment cursor;

public CapturesIterator(
Query query,
MemorySegment cursor,
Tree tree,
SegmentAllocator allocator,
@Nullable BiPredicate<QueryPredicate, QueryMatch> predicate) {
super(Long.MAX_VALUE, Spliterator.IMMUTABLE | Spliterator.NONNULL);
this.predicate = predicate;
this.tree = tree;
this.allocator = allocator;
this.query = query;
this.cursor = cursor;
}

@Override
public boolean tryAdvance(Consumer<? super SimpleImmutableEntry<Integer, QueryMatch>> action) {
var hasNoText = tree.getText() == null;
MemorySegment match = allocator.allocate(TSQueryMatch.layout());
MemorySegment index = allocator.allocate(C_INT);
var captureNames = query.getCaptureNames();
while (ts_query_cursor_next_capture(cursor, match, index)) {
var result = QueryMatch.from(match, captureNames, tree, allocator);
if (hasNoText || query.matches(predicate, result)) {
var entry = new SimpleImmutableEntry<>(index.get(C_INT, 0), result);
action.accept(entry);
return true;
}
}
return false;
}
}
28 changes: 22 additions & 6 deletions src/main/java/io/github/treesitter/jtreesitter/InputEncoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,18 @@
public enum InputEncoding {
/** UTF-8 encoding. */
UTF_8(StandardCharsets.UTF_8),
/** UTF-16 encoding. */
UTF_16(ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE);
/**
* UTF-16 little endian encoding.
*
* @since 0.25.0
*/
UTF_16LE(StandardCharsets.UTF_16LE),
/**
* UTF-16 big endian encoding.
*
* @since 0.25.0
*/
UTF_16BE(StandardCharsets.UTF_16BE);

private final @NonNull Charset charset;

Expand All @@ -22,17 +32,23 @@ Charset charset() {
return charset;
}

private static final boolean IS_BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);

/**
* Convert a standard {@linkplain Charset} to an {@linkplain InputEncoding}.
*
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16} ({@link StandardCharsets#UTF_16LE UTF_16LE} and {@link StandardCharsets#UTF_16BE UTF_16BE} will work too, but native byte order will be used)
* @param charset one of {@link StandardCharsets#UTF_8}, {@link StandardCharsets#UTF_16BE},
* {@link StandardCharsets#UTF_16LE}, or {@link StandardCharsets#UTF_16} (native byte order).
* @throws IllegalArgumentException If the character set is invalid.
*/
@SuppressWarnings("SameParameterValue")
static @NonNull InputEncoding valueOf(@NonNull Charset charset) throws IllegalArgumentException {
if (charset.equals(StandardCharsets.UTF_8)) return InputEncoding.UTF_8;
if (charset.equals(StandardCharsets.UTF_16BE)
|| charset.equals(StandardCharsets.UTF_16LE)
|| charset.equals(StandardCharsets.UTF_16)) return InputEncoding.UTF_16;
if (charset.equals(StandardCharsets.UTF_16BE)) return InputEncoding.UTF_16BE;
if (charset.equals(StandardCharsets.UTF_16LE)) return InputEncoding.UTF_16LE;
if (charset.equals(StandardCharsets.UTF_16)) {
return IS_BIG_ENDIAN ? InputEncoding.UTF_16BE : InputEncoding.UTF_16LE;
}
throw new IllegalArgumentException("Invalid character set: %s".formatted(charset));
}
}
76 changes: 73 additions & 3 deletions src/main/java/io/github/treesitter/jtreesitter/Language.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static io.github.treesitter.jtreesitter.internal.TreeSitter.*;

import io.github.treesitter.jtreesitter.internal.TSLanguageMetadata;
import java.lang.foreign.*;
import org.jspecify.annotations.NullMarked;
import org.jspecify.annotations.Nullable;
Expand Down Expand Up @@ -40,7 +41,7 @@ public final class Language implements Cloneable {
*/
public Language(MemorySegment self) throws IllegalArgumentException {
this.self = self.asReadOnly();
version = ts_language_version(this.self);
version = ts_language_abi_version(this.self);
if (version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION) {
throw new IllegalArgumentException(String.format(
"Incompatible language version %d. Must be between %d and %d.",
Expand Down Expand Up @@ -87,13 +88,51 @@ MemorySegment segment() {
/**
* Get the ABI version number for this language.
*
* <p>When a language is generated by the Tree-sitter CLI, it is assigned
* an ABI version number that corresponds to the current CLI version.
* <p>This version number is used to ensure that languages
* were generated by a compatible version of Tree-sitter.
*
* @since 0.25.0
*/
public @Unsigned int getAbiVersion() {
return version;
}

/**
* Get the ABI version number for this language.
*
* @deprecated Use {@link #getAbiVersion} instead.
*/
@Deprecated(since = "0.25.0", forRemoval = true)
public @Unsigned int getVersion() {
return version;
}

/** Get the name of this language, if available. */
public @Nullable String getName() {
var name = ts_language_name(self);
return name.equals(MemorySegment.NULL) ? null : name.getString(0);
}

/**
* Get the metadata for this language, if available.
*
* @apiNote This information is generated by the Tree-sitter
* CLI and relies on the language author providing the correct
* metadata in the language's {@code tree-sitter.json} file.
*
* @since 0.25.0
*/
public @Nullable LanguageMetadata getMetadata() {
var metadata = ts_language_metadata(self);
if (metadata.equals(MemorySegment.NULL)) return null;

short major = TSLanguageMetadata.major_version(metadata);
short minor = TSLanguageMetadata.minor_version(metadata);
short patch = TSLanguageMetadata.patch_version(metadata);
var version = new LanguageMetadata.Version(major, minor, patch);
return new LanguageMetadata(version);
}

/** Get the number of distinct node types in this language. */
public @Unsigned int getSymbolCount() {
return ts_language_symbol_count(self);
Expand All @@ -109,6 +148,35 @@ MemorySegment segment() {
return ts_language_field_count(self);
}

/**
* Get all supertype symbols for the language.
*
* @since 0.25.0
*/
public @Unsigned short[] getSupertypes() {
try (var alloc = Arena.ofConfined()) {
var length = alloc.allocate(C_INT.byteSize(), C_INT.byteAlignment());
var supertypes = ts_language_supertypes(self, length);
var isEmpty = length.get(C_INT, 0) == 0;
return isEmpty ? new short[0] : supertypes.toArray(C_SHORT);
}
}

/**
* Get all symbols for a given supertype symbol.
*
* @since 0.25.0
* @see #getSupertypes()
*/
public @Unsigned short[] getSubtypes(@Unsigned short supertype) {
try (var alloc = Arena.ofConfined()) {
var length = alloc.allocate(C_INT.byteSize(), C_INT.byteAlignment());
var subtypes = ts_language_subtypes(self, supertype, length);
var isEmpty = length.get(C_INT, 0) == 0;
return isEmpty ? new short[0] : subtypes.toArray(C_SHORT);
}
}

/** Get the node type for the given numerical ID. */
public @Nullable String getSymbolName(@Unsigned short symbol) {
var name = ts_language_symbol_name(self, symbol);
Expand Down Expand Up @@ -187,7 +255,9 @@ public LookaheadIterator lookaheadIterator(@Unsigned short state) throws Illegal
* Create a new query from a string containing one or more S-expression patterns.
*
* @throws QueryError If an error occurred while creating the query.
* @deprecated Use the {@link Query#Query(Language, String) Query} constructor instead.
*/
@Deprecated(since = "0.25.0")
public Query query(String source) throws QueryError {
return new Query(this, source);
}
Expand Down
Loading