From b731d5952992e8dae1d91c66fee10a547d57fca8 Mon Sep 17 00:00:00 2001 From: Jan Lahoda Date: Fri, 25 Dec 2020 11:22:07 +0100 Subject: [PATCH] Support semantic highlight, introduced in specification 3.16.0 --- CMakeLists.txt | 1 + src/message_handler.cc | 2 + src/message_handler.hh | 11 + src/messages/initialize.cc | 53 +++- src/messages/textDocument_semanticToken.cc | 299 +++++++++++++++++++++ src/position.hh | 25 +- src/query.hh | 10 +- 7 files changed, 390 insertions(+), 11 deletions(-) create mode 100644 src/messages/textDocument_semanticToken.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b52b48e3..8bf06cc14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,6 +236,7 @@ target_sources(ccls PRIVATE src/messages/textDocument_hover.cc src/messages/textDocument_references.cc src/messages/textDocument_rename.cc + src/messages/textDocument_semanticToken.cc src/messages/textDocument_signatureHelp.cc src/messages/workspace.cc ) diff --git a/src/message_handler.cc b/src/message_handler.cc index 498a72deb..6546722a0 100644 --- a/src/message_handler.cc +++ b/src/message_handler.cc @@ -188,6 +188,8 @@ MessageHandler::MessageHandler() { bind("textDocument/rename", &MessageHandler::textDocument_rename); bind("textDocument/signatureHelp", &MessageHandler::textDocument_signatureHelp); bind("textDocument/typeDefinition", &MessageHandler::textDocument_typeDefinition); + bind("textDocument/semanticTokens/full", &MessageHandler::textDocument_semanticTokensFull); + bind("textDocument/semanticTokens/range", &MessageHandler::textDocument_semanticTokensRange); bind("workspace/didChangeConfiguration", &MessageHandler::workspace_didChangeConfiguration); bind("workspace/didChangeWatchedFiles", &MessageHandler::workspace_didChangeWatchedFiles); bind("workspace/didChangeWorkspaceFolders", &MessageHandler::workspace_didChangeWorkspaceFolders); diff --git a/src/message_handler.hh b/src/message_handler.hh index 7718e3467..f4149b614 100644 --- a/src/message_handler.hh +++ b/src/message_handler.hh @@ -48,6 +48,15 @@ struct TextDocumentPositionParam { TextDocumentIdentifier textDocument; Position position; }; +struct SemanticTokensParams { + TextDocumentIdentifier textDocument; +}; +REFLECT_STRUCT(SemanticTokensParams, textDocument); +struct SemanticTokensRangeParams { + TextDocumentIdentifier textDocument; + lsRange range; +}; +REFLECT_STRUCT(SemanticTokensRangeParams, textDocument, range); struct TextDocumentEdit { VersionedTextDocumentIdentifier textDocument; std::vector edits; @@ -287,6 +296,8 @@ private: void textDocument_rename(RenameParam &, ReplyOnce &); void textDocument_signatureHelp(TextDocumentPositionParam &, ReplyOnce &); void textDocument_typeDefinition(TextDocumentPositionParam &, ReplyOnce &); + void textDocument_semanticTokensFull(SemanticTokensParams &, ReplyOnce &); + void textDocument_semanticTokensRange(SemanticTokensRangeParams &, ReplyOnce &); void workspace_didChangeConfiguration(EmptyParam &); void workspace_didChangeWatchedFiles(DidChangeWatchedFilesParam &); void workspace_didChangeWorkspaceFolders(DidChangeWorkspaceFoldersParam &); diff --git a/src/messages/initialize.cc b/src/messages/initialize.cc index 1601db704..5ad5ef47f 100644 --- a/src/messages/initialize.cc +++ b/src/messages/initialize.cc @@ -25,6 +25,47 @@ namespace ccls { using namespace llvm; +std::vector SEMANTIC_TOKENS = { + "unknown", + + "file", + "module", + "namespace", + "package", + "class", + "method", + "property", + "field", + "constructor", + "enum", + "interface", + "function", + "variable", + "constant", + "string", + "number", + "boolean", + "array", + "object", + "key", + "null", + "enumMember", + "struct", + "event", + "operator", + "typeParameter", + "typeAlias", //252 => 27 + "parameter", + "staticMethod", + "macro" +}; + +std::vector SEMANTIC_MODIFIERS = { + "declaration", //1 + "definition", //2 + "static" //4 +}; + extern std::vector g_init_options; namespace { @@ -89,6 +130,14 @@ struct ServerCap { std::vector commands = {ccls_xref}; } executeCommandProvider; Config::ServerCap::Workspace workspace; + struct SemanticTokenProvider { + struct SemanticTokensLegend { + std::vector tokenTypes = SEMANTIC_TOKENS; + std::vector tokenModifiers = SEMANTIC_MODIFIERS; + } legend; + bool range = true; + bool full = true; + } semanticTokensProvider; }; REFLECT_STRUCT(ServerCap::CodeActionOptions, codeActionKinds); REFLECT_STRUCT(ServerCap::CodeLensOptions, resolveProvider); @@ -109,7 +158,9 @@ REFLECT_STRUCT(ServerCap, textDocumentSync, hoverProvider, completionProvider, documentRangeFormattingProvider, documentOnTypeFormattingProvider, renameProvider, documentLinkProvider, foldingRangeProvider, - executeCommandProvider, workspace); + executeCommandProvider, workspace, semanticTokensProvider); +REFLECT_STRUCT(ServerCap::SemanticTokenProvider, legend, range, full); +REFLECT_STRUCT(ServerCap::SemanticTokenProvider::SemanticTokensLegend, tokenTypes, tokenModifiers); struct DynamicReg { bool dynamicRegistration = false; diff --git a/src/messages/textDocument_semanticToken.cc b/src/messages/textDocument_semanticToken.cc new file mode 100644 index 000000000..c8c98543a --- /dev/null +++ b/src/messages/textDocument_semanticToken.cc @@ -0,0 +1,299 @@ +// Copyright 2017-2020 ccls Authors +// SPDX-License-Identifier: Apache-2.0 + +#include "indexer.hh" +#include "log.hh" +#include "message_handler.hh" +#include "pipeline.hh" +#include "sema_manager.hh" + +#include +#include +#include +#include + +MAKE_HASHABLE(ccls::SymbolIdx, t.usr, t.kind); + +namespace ccls { +using namespace clang; + +namespace { +struct SemanticTokens { + std::vector data; +}; +REFLECT_STRUCT(SemanticTokens, data); + +struct CclsSemanticHighlightSymbol { + using Id=int; + Id id = 0; + SymbolKind parentKind; + SymbolKind kind; + uint8_t storage; + std::vector> lsRangeAndRoles; +}; + +struct ScanLineEvent { + Position pos; + Position end_pos; // Second key when there is a tie for insertion events. + using Id=int; + Id id; + CclsSemanticHighlightSymbol *symbol; + Role role; + bool operator<(const ScanLineEvent &o) const { + // See the comments below when insertion/deletion events are inserted. + if (!(pos == o.pos)) + return pos < o.pos; + if (!(o.end_pos == end_pos)) + return o.end_pos < end_pos; + // This comparison essentially order Macro after non-Macro, + // So that macros will not be rendered as Var/Type/... + if (symbol->kind != o.symbol->kind) + return symbol->kind < o.symbol->kind; + // If symbol A and B occupy the same place, we want one to be placed + // before the other consistantly. + return symbol->id < o.symbol->id; + } +}; + +} +constexpr Position documentBegin{0,0}; +constexpr Position documentEnd{ + std::numeric_limits::max(), + std::numeric_limits::max()}; + +inline std::ostream &operator<<(std::ostream &s, const Position pos) { + s + << "{line: " << pos.line + << ", end: " << pos.character; + return s; +} +inline std::ostream &operator<<(std::ostream &s, const lsRange &range) { + s + << "lsRange(start:" << range.start + << ", end:" << range.end + << ")"; + return s; +} + +void MessageHandler::textDocument_semanticTokensRange( + SemanticTokensRangeParams ¶m, ReplyOnce &reply) { + if(param.range.start == documentBegin && param.range.end == documentEnd) + LOG_S(INFO) + << "SemanticToken for all document"; + else + LOG_S(INFO) + << "SemanticToken for range " + << param.range.start; + std::string path = param.textDocument.uri.getPath(); + WorkingFile *wfile = wfiles->getFile(path); + if (!wfile) { + reply.notOpened(path); + return; + } + + auto [queryFile,wFile] = findOrFail(path, reply); + if (!queryFile) { + // `findOrFail` already set the reply message + return; + } + + SemanticTokens result; + + static GroupMatch match(g_config->highlight.whitelist, + g_config->highlight.blacklist); + assert(queryFile->def); + if (wfile->buffer_content.size() > g_config->highlight.largeFileSize || + !match.matches(queryFile->def->path)) { + LOG_S(INFO) << "Not SemTokenizing " << path << "because of allowlist/denylist"; + return; + } + + // Group symbols together. + std::unordered_map grouped_symbols; + for (auto [sym, refcnt] : queryFile->symbol2refcnt) { + if (refcnt <= 0) + continue; + // skip symbols that don't intersect range + if( sym.range.end.line < param.range.start.line + || sym.range.start.line > param.range.end.line + // range is within lines here below, let's test if within specified characters/columns + || sym.range.end.column < param.range.start.character + || sym.range.start.column > param.range.end.character) + continue; + std::string_view detailed_name; + SymbolKind parent_kind = SymbolKind::Unknown; + SymbolKind kind = SymbolKind::Unknown; + uint8_t storage = SC_None; + DB::UsrIndex idx; + // This switch statement also filters out symbols that are not highlighted. + switch (sym.kind) { + case Kind::Func: { + idx = db->func_usr[sym.usr]; + const QueryFunc &func = db->funcs[idx]; + const QueryFunc::Def *def = func.anyDef(); + if (!def) + continue; // applies to for loop + // Don't highlight overloadable operators or implicit lambda -> + // std::function constructor. + const auto short_name = def->name(false); + if (short_name.compare(0, 8, "operator") == 0) + continue; // applies to for loop + kind = def->kind; + storage = def->storage; + detailed_name = short_name; + parent_kind = def->parent_kind; + + // Check whether the function name is actually there. + // If not, do not publish the semantic highlight. + // E.g. copy-initialization of constructors should not be highlighted + // but we still want to keep the range for jumping to definition. + const auto concise_name = + detailed_name.substr(0, detailed_name.find('<')); + const auto start_line_idx = sym.range.start.line; + const auto start_col = sym.range.start.column; + if (start_line_idx >= wfile->index_lines.size()) // out-of-range ? + continue; + const auto line = wfile->index_lines[start_line_idx]; + sym.range.end.line = start_line_idx; + if (!(start_col + concise_name.size() <= line.size() && + line.compare(start_col, concise_name.size(), concise_name) == 0)) + continue; + sym.range.end.column = start_col + concise_name.size(); + break; + } + case Kind::Type: { + idx = db->type_usr[sym.usr]; + const QueryType &type = db->types[idx]; + for (auto &def : type.def) { + kind = def.kind; + detailed_name = def.detailed_name; + if (def.spell) { + parent_kind = def.parent_kind; + break; + } + } + break; + } + case Kind::Var: { + idx = db->var_usr[sym.usr]; + const QueryVar &var = db->vars[idx]; + for (auto &def : var.def) { + kind = def.kind; + storage = def.storage; + detailed_name = def.detailed_name; + if (def.spell) { + parent_kind = def.parent_kind; + break; + } + } + break; + } + default: + continue; // applies to for loop + } + + if (auto maybe_loc = getLsRange(wfile, sym.range)) { + auto it = grouped_symbols.find(sym); + const auto &loc = *maybe_loc; + if (it != grouped_symbols.end()) { + it->second.lsRangeAndRoles.push_back({loc, sym.role}); + } else { + CclsSemanticHighlightSymbol symbol; + symbol.id = idx; + symbol.parentKind = parent_kind; + symbol.kind = kind; + symbol.storage = storage; + symbol.lsRangeAndRoles.push_back({loc, sym.role}); + grouped_symbols[sym] = symbol; + } + } + } + + // Make ranges non-overlapping using a scan line algorithm. + std::vector events; + ScanLineEvent::Id id = 0; + for (auto &entry : grouped_symbols) { + CclsSemanticHighlightSymbol &symbol = entry.second; + for (auto &loc : symbol.lsRangeAndRoles) { + // For ranges sharing the same start point, the one with leftmost end + // point comes first. + events.push_back({loc.first.start, loc.first.end, id, &symbol, loc.second}); + // For ranges sharing the same end point, their relative order does not + // matter, therefore we arbitrarily assign loc.end to them. We use + // negative id to indicate a deletion event. + events.push_back({loc.first.end, loc.first.end, ~id, &symbol, loc.second}); + id++; + } + symbol.lsRangeAndRoles.clear(); + } + std::sort(events.begin(), events.end()); + + std::vector deleted(id, 0); + int top = 0; + for (size_t i = 0; i < events.size(); i++) { + while (top && deleted[events[top - 1].id]) + top--; + // Order [a, b0) after [a, b1) if b0 < b1. The range comes later overrides + // the ealier. The order of [a0, b) [a1, b) does not matter. + // The order of [a, b) [b, c) does not as long as we do not emit empty + // ranges. + // Attribute range [events[i-1].pos, events[i].pos) to events[top-1].symbol + // . + if (top && !(events[i - 1].pos == events[i].pos)) + events[top - 1].symbol->lsRangeAndRoles.push_back( + {{events[i - 1].pos, events[i].pos}, events[i].role}); + if (events[i].id >= 0) + events[top++] = events[i]; + else + deleted[~events[i].id] = 1; + } + + // Transform lsRange into pair (offset pairs) + std::vector, CclsSemanticHighlightSymbol *>> scratch; + for (auto &entry : grouped_symbols) { + for (auto &range : entry.second.lsRangeAndRoles) + scratch.emplace_back(range, &entry.second); + entry.second.lsRangeAndRoles.clear(); + } + std::sort(scratch.begin(), scratch.end(), + [](auto &l, auto &r) { return l.first.first.start < r.first.first.start; }); + int line = 0; + int column = 0; + for (auto &entry : scratch) { + lsRange &r = entry.first.first; + if (r.start.line != line) { + column = 0; + } + result.data.push_back(r.start.line - line); line = r.start.line; + result.data.push_back(r.start.character - column); column = r.start.character; + result.data.push_back(r.end.character - r.start.character); + uint8_t kindId; + int modifiers = entry.second->storage == SC_Static ? 4 : 0; + if (entry.first.second & Role::Declaration) { + modifiers |= 1; + } + if (entry.first.second & Role::Definition) { + modifiers |= 2; + } + if (entry.second->kind == SymbolKind::StaticMethod) { + kindId = (uint8_t) SymbolKind::Method; + modifiers = 4; + } else { + kindId = (uint8_t) entry.second->kind; + if (kindId > (uint8_t) SymbolKind::StaticMethod) + kindId--; + if (kindId >= 252) kindId = 27 + kindId - 252; + } + result.data.push_back(kindId); + result.data.push_back(modifiers); + } + + reply(result); +} +void MessageHandler::textDocument_semanticTokensFull( + SemanticTokensParams ¶m, ReplyOnce &reply){ + lsRange fullRange{documentBegin, documentEnd}; + SemanticTokensRangeParams fullRangeParameters{param.textDocument, fullRange}; + textDocument_semanticTokensRange(fullRangeParameters, reply); +} +} // namespace ccls diff --git a/src/position.hh b/src/position.hh index e59804d2b..a227ae263 100644 --- a/src/position.hh +++ b/src/position.hh @@ -10,8 +10,10 @@ namespace ccls { struct Pos { - uint16_t line = 0; - int16_t column = -1; + using Line=uint16_t; + Line line = 0; + using Column=int16_t; + Column column = -1; static Pos fromString(const std::string &encoded); @@ -21,14 +23,23 @@ struct Pos { // Compare two Positions and check if they are equal. Ignores the value of // |interesting|. bool operator==(const Pos &o) const { - return line == o.line && column == o.column; + return asTuple() == o.asTuple(); } bool operator<(const Pos &o) const { - if (line != o.line) - return line < o.line; - return column < o.column; + return asTuple() < o.asTuple(); } - bool operator<=(const Pos &o) const { return !(o < *this); } + bool operator<=(const Pos &o) const { + return asTuple() <= o.asTuple(); + } +protected: + /*! + * (line, pos) + * use for lexicographic comparison + */ + auto asTuple() const -> std::tuple { + return std::make_tuple(line, column); + } + }; struct Range { diff --git a/src/query.hh b/src/query.hh index cd3a19c17..37dbd9c3e 100644 --- a/src/query.hh +++ b/src/query.hh @@ -42,8 +42,9 @@ struct QueryFile { int id = -1; std::optional def; - // `extent` is valid => declaration; invalid => regular reference - llvm::DenseMap symbol2refcnt; + //! `extent` is valid => declaration; invalid => regular reference + using SymbolToRefCount=llvm::DenseMap; + SymbolToRefCount symbol2refcnt; }; template struct QueryEntity { @@ -146,7 +147,10 @@ using Lid2file_id = std::unordered_map; struct DB { std::vector files; llvm::StringMap name2file_id; - llvm::DenseMap func_usr, type_usr, var_usr; + //! Underlying type used for indexes-of-Usr + using UsrIndex=int; + //! Usr → index + llvm::DenseMap func_usr, type_usr, var_usr; llvm::SmallVector funcs; llvm::SmallVector types; llvm::SmallVector vars;