Skip to content

Commit

Permalink
Change implicit import handling to be namespace-oriented. (#4089)
Browse files Browse the repository at this point in the history
This refactors how the implicit import is handled in order to retain
more name scope information. As a consequence, private access control
works better between api files and implementation files. Note though
that this will also be essential for name poisoning between the API and
implementation, as discussed in #3763.

In implementing this, I ran into a couple issues with namespaces that I
think point to flaws in their handling. I've fixed some and added a TODO
for the biggest issue (in check.cpp line 281-288), which relates to the
handling of namespaces of direct imports which are first evaluated
indirectly.
  • Loading branch information
jonmeow authored Jun 28, 2024
1 parent 10a198a commit 3f78e1d
Show file tree
Hide file tree
Showing 16 changed files with 1,038 additions and 214 deletions.
162 changes: 135 additions & 27 deletions toolchain/check/check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,37 @@ struct UnitInfo {
};
} // namespace

// Collects transitive imports, handling deduplication.
static auto CollectTransitiveImports(const UnitInfo::PackageImports& imports,
int total_ir_count)
// Collects direct imports, for CollectTransitiveImports.
static auto CollectDirectImports(llvm::SmallVector<SemIR::ImportIR>& results,
llvm::MutableArrayRef<int> ir_to_result_index,
const UnitInfo::PackageImports& imports,
bool is_local) -> void {
for (const auto& import : imports.imports) {
const auto& direct_ir = **import.unit_info->unit->sem_ir;
auto& index = ir_to_result_index[direct_ir.check_ir_id().index];
if (index != -1) {
// This should only happen when doing API imports for an implementation
// file. Don't change the entry; is_export doesn't matter.
continue;
}
index = results.size();
results.push_back(
// TODO: For !is_local, should this use something valid that points at
// the API's import? That would probably require something other than a
// node_id, such as a LocId.
{.node_id = is_local ? import.names.node_id : Parse::InvalidNodeId(),
.sem_ir = &direct_ir,
// Only tag exports in API files, ignoring the value in implementation
// files.
.is_export = is_local && import.names.is_export});
}
}

// Collects transitive imports, handling deduplication. These will be unified
// between local_imports and api_imports.
static auto CollectTransitiveImports(
const UnitInfo::PackageImports* local_imports,
const UnitInfo::PackageImports* api_imports, int total_ir_count)
-> llvm::SmallVector<SemIR::ImportIR> {
llvm::SmallVector<SemIR::ImportIR> results;

Expand All @@ -110,12 +138,13 @@ static auto CollectTransitiveImports(const UnitInfo::PackageImports& imports,

// First add direct imports. This means that if an entity is imported both
// directly and indirectly, the import path will reflect the direct import.
for (const auto& import : imports.imports) {
const auto& direct_ir = **import.unit_info->unit->sem_ir;
ir_to_result_index[direct_ir.check_ir_id().index] = results.size();
results.push_back({.node_id = import.names.node_id,
.sem_ir = &direct_ir,
.is_export = import.names.is_export});
if (local_imports) {
CollectDirectImports(results, ir_to_result_index, *local_imports,
/*is_local=*/true);
}
if (api_imports) {
CollectDirectImports(results, ir_to_result_index, *api_imports,
/*is_local=*/false);
}

// Loop through direct imports for any indirect exports. The underlying vector
Expand Down Expand Up @@ -171,14 +200,106 @@ static auto ImportCurrentPackage(Context& context, UnitInfo& unit_info,

ImportLibrariesFromCurrentPackage(
context, namespace_type_id,
CollectTransitiveImports(self_import, total_ir_count));
CollectTransitiveImports(&self_import, /*api_imports=*/nullptr,
total_ir_count));

context.scope_stack().Push(
package_inst_id, SemIR::NameScopeId::Package,
context.name_scopes().Get(SemIR::NameScopeId::Package).has_error);
}

// Imports all other packages (excluding the current package).
static auto ImportOtherPackages(Context& context, UnitInfo& unit_info,
int total_ir_count,
SemIR::TypeId namespace_type_id) -> void {
// api_imports_list is initially the size of the current file's imports,
// including for API files, for simplicity in iteration. It's only really used
// when processing an implementation file, in order to combine the API file
// imports.
//
// For packages imported by the API file, the IdentifierId is the package name
// and the index is into the API's import list. Otherwise, the initial
// {Invalid, -1} state remains.
llvm::SmallVector<std::pair<IdentifierId, int32_t>> api_imports_list;
api_imports_list.resize(unit_info.package_imports.size(),
{IdentifierId::Invalid, -1});

// When there's an API file, add the mapping to api_imports_list.
if (unit_info.api_for_impl) {
const auto& api_identifiers =
unit_info.api_for_impl->unit->value_stores->identifiers();
auto& impl_identifiers = unit_info.unit->value_stores->identifiers();

for (auto [api_imports_index, api_imports] :
llvm::enumerate(unit_info.api_for_impl->package_imports)) {
// Skip the current package.
if (!api_imports.package_id.is_valid()) {
continue;
}
// Translate the package ID from the API file to the implementation file.
auto impl_package_id =
impl_identifiers.Add(api_identifiers.Get(api_imports.package_id));
if (auto it = unit_info.package_imports_map.find(impl_package_id);
it != unit_info.package_imports_map.end()) {
// On a hit, replace the entry to unify the API and implementation
// imports.
api_imports_list[it->second] = {impl_package_id, api_imports_index};
} else {
// On a miss, add the package as API-only.
api_imports_list.push_back({impl_package_id, api_imports_index});
}
}
}

for (auto [i, api_imports_entry] : llvm::enumerate(api_imports_list)) {
// These variables are updated after figuring out which imports are present.
Parse::ImportDeclId node_id = Parse::InvalidNodeId();
IdentifierId package_id = IdentifierId::Invalid;
bool has_load_error = false;

// Identify the local package imports if present.
const UnitInfo::PackageImports* local_imports = nullptr;
if (i < unit_info.package_imports.size()) {
local_imports = &unit_info.package_imports[i];
if (!local_imports->package_id.is_valid()) {
// Skip the current package.
continue;
}
node_id = local_imports->node_id;
package_id = local_imports->package_id;
has_load_error |= local_imports->has_load_error;
}

// Identify the API package imports if present.
const UnitInfo::PackageImports* api_imports = nullptr;
if (api_imports_entry.second != -1) {
api_imports =
&unit_info.api_for_impl->package_imports[api_imports_entry.second];
if (!package_id.is_valid()) {
package_id = api_imports_entry.first;
} else {
CARBON_CHECK(package_id == api_imports_entry.first);
}
has_load_error |= api_imports->has_load_error;
}

// Do the actual import.
ImportLibrariesFromOtherPackage(
context, namespace_type_id, node_id, package_id,
CollectTransitiveImports(local_imports, api_imports, total_ir_count),
has_load_error);
}
}

// Add imports to the root block.
// TODO: Should this be importing all namespaces before anything else, including
// for imports from other packages? Otherwise, name conflict resolution
// involving something such as `fn F() -> Other.NS.A`, wherein `Other.NS`
// hasn't been imported yet (before `Other.NS` had a constant assigned), could
// result in an inconsistent scoping for `Other.NS.A` versus if it were imported
// as part of scanning `Other.NS` (after `Other.NS` had a constant assigned).
// This will probably require IRs separating out which namespaces they
// declare (or declare entities inside).
static auto InitPackageScopeAndImports(Context& context, UnitInfo& unit_info,
int total_ir_count) -> void {
// First create the constant values map for all imported IRs. We'll populate
Expand Down Expand Up @@ -217,10 +338,9 @@ static auto InitPackageScopeAndImports(Context& context, UnitInfo& unit_info,
// If there is an implicit `api` import, set it first so that it uses the
// ImportIRId::ApiForImpl when processed for imports.
if (unit_info.api_for_impl) {
SetApiImportIR(
context,
{.node_id = context.parse_tree().packaging_decl()->names.node_id,
.sem_ir = &**unit_info.api_for_impl->unit->sem_ir});
auto node_id = context.parse_tree().packaging_decl()->names.node_id;
const auto& api_sem_ir = **unit_info.api_for_impl->unit->sem_ir;
ImportApiFile(context, namespace_type_id, node_id, api_sem_ir);
} else {
SetApiImportIR(context,
{.node_id = Parse::InvalidNodeId(), .sem_ir = nullptr});
Expand All @@ -229,19 +349,7 @@ static auto InitPackageScopeAndImports(Context& context, UnitInfo& unit_info,
ImportCurrentPackage(context, unit_info, total_ir_count, package_inst_id,
namespace_type_id);
CARBON_CHECK(context.scope_stack().PeekIndex() == ScopeIndex::Package);

for (auto& package_imports : unit_info.package_imports) {
if (!package_imports.package_id.is_valid()) {
// Current package is handled above.
continue;
}

ImportLibrariesFromOtherPackage(
context, namespace_type_id, package_imports.node_id,
package_imports.package_id,
CollectTransitiveImports(package_imports, total_ir_count),
package_imports.has_load_error);
}
ImportOtherPackages(context, unit_info, total_ir_count, namespace_type_id);
}

namespace {
Expand Down
98 changes: 93 additions & 5 deletions toolchain/check/import.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ static auto CacheCopiedNamespace(

// Copies a namespace from the import IR, returning its ID. This may diagnose
// name conflicts, but that won't change the result because namespaces supersede
// other names in conflicts.
// other names in conflicts. copied_namespaces is optional.
static auto CopySingleNameScopeFromImportIR(
Context& context, SemIR::TypeId namespace_type_id,
llvm::DenseMap<SemIR::NameScopeId, SemIR::NameScopeId>& copied_namespaces,
llvm::DenseMap<SemIR::NameScopeId, SemIR::NameScopeId>* copied_namespaces,
SemIR::ImportIRId ir_id, SemIR::InstId import_inst_id,
SemIR::NameScopeId import_scope_id, SemIR::NameScopeId parent_scope_id,
SemIR::NameId name_id) -> SemIR::NameScopeId {
Expand All @@ -172,7 +172,10 @@ static auto CopySingleNameScopeFromImportIR(
context.import_ir_constant_values()[ir_id.index].Set(import_inst_id,
namespace_const_id);

CacheCopiedNamespace(copied_namespaces, import_scope_id, namespace_scope_id);
if (copied_namespaces) {
CacheCopiedNamespace(*copied_namespaces, import_scope_id,
namespace_scope_id);
}
return namespace_scope_id;
}

Expand Down Expand Up @@ -219,7 +222,7 @@ static auto CopyAncestorNameScopesFromImportIR(
auto name_id =
CopyNameFromImportIR(context, import_sem_ir, import_scope.name_id);
scope_cursor = CopySingleNameScopeFromImportIR(
context, namespace_type_id, copied_namespaces, ir_id,
context, namespace_type_id, &copied_namespaces, ir_id,
import_scope.inst_id, import_scope_id, scope_cursor, name_id);
}

Expand Down Expand Up @@ -257,6 +260,91 @@ static auto AddImportRefOrMerge(Context& context, SemIR::ImportIRId ir_id,
&import_sem_ir, import_inst_id);
}

namespace {
// A scope in the API file that still needs to be copied to the implementation
// file. Only used for API file imports.
struct TodoScope {
// The scope's instruction in the API file.
SemIR::InstId api_inst_id;
// The scope in the API file.
SemIR::NameScopeId api_scope_id;
// The already-translated scope name in the implementation file.
SemIR::NameId impl_name_id;
// The already-copied parent scope in the implementation file.
SemIR::NameScopeId impl_parent_scope_id;
};
} // namespace

// Imports entries in a specific scope into the current file.
static auto ImportScopeFromApiFile(Context& context,
const SemIR::File& api_sem_ir,
SemIR::NameScopeId api_scope_id,
SemIR::NameScopeId impl_scope_id,
llvm::SmallVector<TodoScope>& todo_scopes)
-> void {
const auto& api_scope = api_sem_ir.name_scopes().Get(api_scope_id);
auto& impl_scope = context.name_scopes().Get(impl_scope_id);

for (const auto& api_entry : api_scope.names) {
auto impl_name_id =
CopyNameFromImportIR(context, api_sem_ir, api_entry.name_id);
if (auto ns =
api_sem_ir.insts().TryGetAs<SemIR::Namespace>(api_entry.inst_id)) {
// Ignore cross-package imports. These will be handled through
// ImportLibrariesFromOtherPackage.
if (api_scope_id == SemIR::NameScopeId::Package) {
const auto& ns_scope = api_sem_ir.name_scopes().Get(ns->name_scope_id);
if (!ns_scope.import_ir_scopes.empty()) {
continue;
}
}

// Namespaces will be recursed into. Name scope creation is delayed in
// order to avoid invalidating api_scope/impl_scope.
todo_scopes.push_back({.api_inst_id = api_entry.inst_id,
.api_scope_id = ns->name_scope_id,
.impl_name_id = impl_name_id,
.impl_parent_scope_id = impl_scope_id});
} else {
// Add an ImportRef for other instructions.
auto impl_bind_name_id = context.bind_names().Add(
{.name_id = impl_name_id,
.parent_scope_id = impl_scope_id,
.bind_index = SemIR::CompileTimeBindIndex::Invalid});
auto import_ref_id = AddImportRef(context,
{.ir_id = SemIR::ImportIRId::ApiForImpl,
.inst_id = api_entry.inst_id},
impl_bind_name_id);
impl_scope.AddRequired({.name_id = impl_name_id,
.inst_id = import_ref_id,
.access_kind = api_entry.access_kind});
}
}
}

auto ImportApiFile(Context& context, SemIR::TypeId namespace_type_id,
Parse::ImportDeclId node_id, const SemIR::File& api_sem_ir)
-> void {
SetApiImportIR(context, {.node_id = node_id, .sem_ir = &api_sem_ir});
context.import_ir_constant_values()[SemIR::ImportIRId::ApiForImpl.index].Set(
SemIR::InstId::PackageNamespace,
context.constant_values().Get(SemIR::InstId::PackageNamespace));

llvm::SmallVector<TodoScope> todo_scopes = {};
ImportScopeFromApiFile(context, api_sem_ir, SemIR::NameScopeId::Package,
SemIR::NameScopeId::Package, todo_scopes);
while (!todo_scopes.empty()) {
auto todo_scope = todo_scopes.pop_back_val();
auto impl_scope_id = CopySingleNameScopeFromImportIR(
context, namespace_type_id, /*copied_namespaces=*/nullptr,
SemIR::ImportIRId::ApiForImpl, todo_scope.api_inst_id,
todo_scope.api_scope_id, todo_scope.impl_parent_scope_id,
todo_scope.impl_name_id);
ImportScopeFromApiFile(context, api_sem_ir, todo_scope.api_scope_id,
impl_scope_id, todo_scopes);
}
}

auto ImportLibrariesFromCurrentPackage(
Context& context, SemIR::TypeId namespace_type_id,
llvm::ArrayRef<SemIR::ImportIR> import_irs) -> void {
Expand Down Expand Up @@ -285,7 +373,7 @@ auto ImportLibrariesFromCurrentPackage(
// Namespaces are always imported because they're essential for
// qualifiers, and the type is simple.
CopySingleNameScopeFromImportIR(
context, namespace_type_id, copied_namespaces, ir_id,
context, namespace_type_id, &copied_namespaces, ir_id,
import_inst_id, import_namespace_inst->name_scope_id,
parent_scope_id, name_id);
} else {
Expand Down
8 changes: 8 additions & 0 deletions toolchain/check/import.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

namespace Carbon::Check {

// Imports the API file's name lookup information into a corresponding
// implementation file. Only information for the current package will be copied;
// information for other packages should be handled through
// ImportLibrariesFromOtherPackage.
auto ImportApiFile(Context& context, SemIR::TypeId namespace_type_id,
Parse::ImportDeclId node_id, const SemIR::File& api_sem_ir)
-> void;

// Add the current package's imports to name lookup. This pulls in all names;
// conflicts for things such as `package.a.b.c` will be flagged even though they
// are several layers deep.
Expand Down
Loading

0 comments on commit 3f78e1d

Please sign in to comment.