From aa644914fe7c03a88511f07955d18b70ec587d83 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Fri, 28 Jul 2023 21:06:12 +0900 Subject: [PATCH] Merge .riscv.attributes input sections --- elf/arch-riscv.cc | 247 ++++++++++++++++++++++++++++++++- elf/elf.h | 11 ++ elf/input-files.cc | 68 ++++++++- elf/mold.h | 26 ++++ elf/output-chunks.cc | 10 +- elf/passes.cc | 2 + elf/relocatable.cc | 3 + test/elf/riscv64_attributes.sh | 20 +++ 8 files changed, 384 insertions(+), 3 deletions(-) create mode 100755 test/elf/riscv64_attributes.sh diff --git a/elf/arch-riscv.cc b/elf/arch-riscv.cc index 73bef2c287..2809a2317f 100644 --- a/elf/arch-riscv.cc +++ b/elf/arch-riscv.cc @@ -929,6 +929,250 @@ i64 riscv_resize_sections(Context &ctx) { return set_osec_offsets(ctx); } +// ISA name handlers +// +// An example of ISA name is "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0". +// An ISA name starts with the base name (e.g. "rv64i2p1") followed by +// ISA extensions separated by underscores. +// +// There are lots of ISA extensions defined for RISC-V, and they are +// identified by name. Some extensions are of single-letter alphabet such +// as "m" or "q". Newer extension names start with "z" followed by one or +// more alphabets (i.e. "zicsr"). "s" and "x" prefixes are reserved +// for supervisor- level extensions and private extensions, respectively. +// +// Each extension consists of a name, a major version and a minor version. +// For example, "m2p0" indicates the "m" extension of version 2.0. "p" is +// just a separator. +// +// Each RISC-V object file contains an ISA string enumerating extensions +// used by the object file. We need to merge input objects' ISA strings +// into a single ISA string. +// +// In order to guarantee string uniqueness, extensions have to be ordered +// in a specific manner. The exact rule is unfortunately a bit complicated. +// +// The following functions takes care of ISA strings. + +struct Extn { + std::string_view name; + i64 major; + i64 minor; +}; + +// As per the RISC-V spec, the extension names must be sorted in a very +// specific way, and unfortunately that's not just an alphabetical order. +// For example, rv64imafd is a legal ISA string, whereas rv64iafdm is not. +// The exact rule is somewhat arbitrary. +// +// This function returns true if the first extension name should precede +// the second one as per the rule. +static bool extn_name_less(const Extn &e1, const Extn &e2) { + auto get_single_letter_rank = [](char c) -> i64 { + std::string_view exts = "iemafdqlcbkjtpvnh"; + size_t pos = exts.find_first_of(c); + if (pos != exts.npos) + return pos; + return c - 'a' + exts.size(); + }; + + auto get_rank = [&](std::string_view str) -> i64 { + switch (str[0]) { + case 'x': + return 1 << 20; + case 's': + return 1 << 19; + case 'z': + return (1 << 18) + get_single_letter_rank(str[1]); + default: + return get_single_letter_rank(str[0]); + } + }; + + return std::tuple{get_rank(e1.name), e1.name} < + std::tuple{get_rank(e2.name), e2.name}; +} + +static bool extn_version_less(const Extn &e1, const Extn &e2) { + return std::tuple{e1.major, e1.minor} < + std::tuple{e2.major, e2.minor}; +} + +static std::optional read_extn_string(std::string_view &str) { + Extn extn; + + size_t pos = str.find_first_of("0123456789"); + if (pos == str.npos) + return {}; + + extn.name = str.substr(0, pos); + str = str.substr(pos); + + size_t nread; + extn.major = std::stoul(std::string(str), &nread, 10); + str = str.substr(nread); + if (str.size() < 2 || str[0] != 'p') + return {}; + str = str.substr(1); + + extn.minor = std::stoul(std::string(str), &nread, 10); + str = str.substr(nread); + if (str.empty() || str[0] == '_') + return extn; + return {}; +} + +static std::vector parse_arch_string(std::string_view str) { + if (str.size() < 5) + return {}; + + // Parse the base part + std::string_view base = str.substr(0, 5); + if (base != "rv32i" && base != "rv32e" && base != "rv64i" && base != "rv64e") + return {}; + str = str.substr(4); + + std::optional extn = read_extn_string(str); + if (!extn) + return {}; + + std::vector vec; + extn->name = base; + vec.push_back(*extn); + + // Parse extensions + while (!str.empty()) { + if (str[0] != '_') + return {}; + str = str.substr(1); + + std::optional extn = read_extn_string(str); + if (!extn) + return {}; + vec.push_back(*extn); + } + return vec; +} + +static std::vector merge_extensions(std::span x, std::span y) { + std::vector vec; + + // The base part (i.e. "rv64i" or "rv32i") must match. + if (x[0].name != y[0].name) + return {}; + + // Merge ISA extension strings + while (!x.empty() && !y.empty()) { + if (x[0].name == y[0].name) { + vec.push_back(extn_version_less(x[0], y[0]) ? y[0] : x[0]); + x = x.subspan(1); + y = y.subspan(1); + } else if (extn_name_less(x[0], y[0])) { + vec.push_back(x[0]); + x = x.subspan(1); + } else { + vec.push_back(y[0]); + y = y.subspan(1); + } + } + + vec.insert(vec.end(), x.begin(), x.end()); + vec.insert(vec.end(), y.begin(), y.end()); + return vec; +} + +static std::string to_string(std::span v) { + std::string str = std::string(v[0].name) + std::to_string(v[0].major) + + "p" + std::to_string(v[0].minor); + + for (i64 i = 1; i < v.size(); i++) + str += "_" + std::string(v[i].name) + std::to_string(v[i].major) + + "p" + std::to_string(v[i].minor); + return str; +} + +// +// Output .riscv.attributes class +// + +template requires is_riscv +void RiscvAttributesSection::update_shdr(Context &ctx) { + if (!contents.empty()) + return; + + i64 stack = -1; + std::vector arch; + bool unaligned = false; + + for (ObjectFile *file : ctx.objs) { + if (file->extra.stack_align) { + i64 val = *file->extra.stack_align; + if (stack != -1 && stack != val) + Error(ctx) << *file << ": stack alignment requirement mistmatch"; + stack = val; + } + + if (file->extra.arch) { + std::vector arch2 = parse_arch_string(*file->extra.arch); + if (arch2.empty()) + Error(ctx) << *file << ": corrupted .riscv.attributes ISA string: " + << *file->extra.arch; + + if (arch.empty()) { + arch = arch2; + } else { + arch = merge_extensions(arch, arch2); + if (arch.empty()) + Error(ctx) << *file << ": incompatible .riscv.attributes ISA string: " + << *file->extra.arch; + } + } + + if (file->extra.unaligned_access.value_or(false)) + unaligned = true; + } + + if (arch.empty()) + return; + + std::string arch_str = to_string(arch); + contents.resize(arch_str.size() + 100); + + u8 *p = (u8 *)contents.data(); + *p++ = 'A'; // Format version + U32 *sub_sz = (U32 *)p; // Sub-section length + p += 4; + p += write_string(p, "riscv"); // Vendor name + u8 *sub_sub_start = p; + *p++ = ELF_TAG_FILE; // Sub-section tag + U32 *sub_sub_sz = (U32 *)p; // Sub-sub-section length + p += 4; + + if (stack != -1) { + p += write_uleb(p, ELF_TAG_RISCV_STACK_ALIGN); + p += write_uleb(p, stack); + } + + p += write_uleb(p, ELF_TAG_RISCV_ARCH); + p += write_string(p, arch_str); + + if (unaligned) { + p += write_uleb(p, ELF_TAG_RISCV_UNALIGNED_ACCESS); + p += write_uleb(p, 1); + } + + i64 sz = p - (u8 *)contents.data(); + *sub_sz = sz - 1; + *sub_sub_sz = p - sub_sub_start; + contents.resize(sz); + this->shdr.sh_size = sz; +} + +template requires is_riscv +void RiscvAttributesSection::copy_buf(Context &ctx) { + memcpy(ctx.buf + this->shdr.sh_offset, contents.data(), contents.size()); +} + #define INSTANTIATE(E) \ template void write_plt_header(Context &, u8 *); \ template void write_plt_entry(Context &, u8 *, Symbol &); \ @@ -939,7 +1183,8 @@ i64 riscv_resize_sections(Context &ctx) { template void InputSection::apply_reloc_nonalloc(Context &, u8 *); \ template void InputSection::copy_contents_riscv(Context &, u8 *); \ template void InputSection::scan_relocations(Context &); \ - template i64 riscv_resize_sections(Context &); + template i64 riscv_resize_sections(Context &); \ + template class RiscvAttributesSection; INSTANTIATE(RV64LE); INSTANTIATE(RV64BE); diff --git a/elf/elf.h b/elf/elf.h index 4ad6ee876f..404f6be9cb 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -88,6 +88,7 @@ enum : u32 { SHT_X86_64_UNWIND = 0x70000001, SHT_ARM_EXIDX = 0x70000001, SHT_ARM_ATTRIBUTES = 0x70000003, + SHT_RISCV_ATTRIBUTES = 0x70000003, }; enum : u32 { @@ -186,6 +187,7 @@ enum : u32 { PT_GNU_RELRO = 0x6474e552, PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, PT_ARM_EXIDX = 0x70000001, + PT_RISCV_ATTRIBUTES = 0x70000003, }; enum : u32 { @@ -381,6 +383,15 @@ enum : u32 { STO_ALPHA_STD_GPLOAD = 0x22, }; +enum : u32 { + ELF_TAG_FILE = 1, + ELF_TAG_SECTION = 2, + ELF_TAG_SYMBOL = 3, + ELF_TAG_RISCV_STACK_ALIGN = 4, + ELF_TAG_RISCV_ARCH = 5, + ELF_TAG_RISCV_UNALIGNED_ACCESS = 6, +}; + // // Relocation types // diff --git a/elf/input-files.cc b/elf/input-files.cc index 28e12c37e0..b228d27345 100644 --- a/elf/input-files.cc +++ b/elf/input-files.cc @@ -140,6 +140,62 @@ ObjectFile::read_note_gnu_property(Context &ctx, const ElfShdr &shdr) { } } +static inline std::string_view read_string(std::string_view &str) { + i64 pos = str.find_first_of('\0'); + std::string_view val = str.substr(0, pos); + str = str.substr(pos + 1); + return val; +} + +// +// [ "vendor-name" *]+ ]* +template +static void read_riscv_attributes(Context &ctx, ObjectFile &file, + std::string_view data) { +const char *begin = data.data(); + if (data.empty()) + Fatal(ctx) << file << ": corrupted .riscv.attributes section"; + + if (u8 format_version = data[0]; format_version != 'A') + return; + data = data.substr(1); + + while (!data.empty()) { + i64 sz = *(U32 *)data.data(); + if (data.size() < sz) + Fatal(ctx) << file << ": corrupted .riscv.attributes section"; + + std::string_view p(data.data() + 4, sz - 4); + data = data.substr(sz); + + if (!p.starts_with("riscv\0"sv)) + continue; + p = p.substr(6); + + if (!p.starts_with(ELF_TAG_FILE)) + Fatal(ctx) << file << ": corrupted .riscv.attributes section"; + p = p.substr(5); // skip the tag and the sub-sub-section size + + while (!p.empty()) { + i64 tag = read_uleb(p); + + switch (tag) { + case ELF_TAG_RISCV_STACK_ALIGN: + file.extra.stack_align = read_uleb(p); + break; + case ELF_TAG_RISCV_ARCH: + file.extra.arch = read_string(p); + break; + case ELF_TAG_RISCV_UNALIGNED_ACCESS: + file.extra.unaligned_access = read_uleb(p); + break; + default: + break; + } + } + } +} + template static u64 read_mips_gp0(Context &ctx, InputSection &isec) { std::string_view data = isec.contents; @@ -170,6 +226,17 @@ void ObjectFile::initialize_sections(Context &ctx) { shdr.sh_type != SHT_LLVM_ADDRSIG && !ctx.arg.relocatable) continue; + if constexpr (is_arm) + if (shdr.sh_type == SHT_ARM_ATTRIBUTES) + continue; + + if constexpr (is_riscv) { + if (shdr.sh_type == SHT_RISCV_ATTRIBUTES) { + read_riscv_attributes(ctx, *this, this->get_string(ctx, shdr)); + continue; + } + } + switch (shdr.sh_type) { case SHT_GROUP: { // Get the signature of this section group. @@ -212,7 +279,6 @@ void ObjectFile::initialize_sections(Context &ctx) { case SHT_REL: case SHT_RELA: case SHT_NULL: - case SHT_ARM_ATTRIBUTES: break; default: { std::string_view name = this->shstrtab.data() + shdr.sh_name; diff --git a/elf/mold.h b/elf/mold.h index ba278cb0ae..7b01e2ea05 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -1192,6 +1192,13 @@ class InputFile { template struct ObjectFileExtras {}; +template requires is_riscv +struct ObjectFileExtras { + std::optional stack_align; + std::optional arch; + std::optional unaligned_access; +}; + template <> struct ObjectFileExtras { InputSection *got2 = nullptr; }; @@ -1466,6 +1473,20 @@ void fixup_arm_exidx_section(Context &ctx); // arch-riscv64.cc // +template requires is_riscv +class RiscvAttributesSection : public Chunk { +public: + RiscvAttributesSection() { + this->name = ".riscv.attributes"; + this->shdr.sh_type = SHT_RISCV_ATTRIBUTES; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector contents; +}; + template i64 riscv_resize_sections(Context &ctx); @@ -1647,6 +1668,11 @@ struct SectionOrder { // Target-specific context members template struct ContextExtras {}; +template requires is_riscv +struct ContextExtras { + RiscvAttributesSection *riscv_attributes = nullptr; +}; + template <> struct ContextExtras { Symbol *_SDA_BASE_ = nullptr; }; diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 00030d3863..5895de7a90 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -213,11 +213,15 @@ static std::vector> create_phdr(Context &ctx) { phdr.p_flags = flags; phdr.p_align = std::max(min_align, chunk->shdr.sh_addralign); phdr.p_offset = chunk->shdr.sh_offset; + if (chunk->shdr.sh_type != SHT_NOBITS) phdr.p_filesz = chunk->shdr.sh_size; + phdr.p_vaddr = chunk->shdr.sh_addr; phdr.p_paddr = chunk->shdr.sh_addr; - phdr.p_memsz = chunk->shdr.sh_size; + + if (chunk->shdr.sh_flags & SHF_ALLOC) + phdr.p_memsz = chunk->shdr.sh_size; }; auto append = [&](Chunk *chunk) { @@ -349,6 +353,10 @@ static std::vector> create_phdr(Context &ctx) { if (OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX)) define(PT_ARM_EXIDX, PF_R, 4, osec); + // Create a PT_RISCV_ATTRIBUTES + if constexpr (is_riscv) + define(PT_RISCV_ATTRIBUTES, PF_R, 1, ctx.extra.riscv_attributes); + // Create a PT_OPENBSD_RANDOMIZE for (Chunk *chunk : ctx.chunks) if (chunk->name == ".openbsd.randomdata") diff --git a/elf/passes.cc b/elf/passes.cc index 8fa1ba022e..489d348219 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -112,6 +112,8 @@ void create_synthetic_sections(Context &ctx) { ctx.note_package = push(new NotePackageSection); ctx.note_property = push(new NotePropertySection); + if constexpr (is_riscv) + ctx.extra.riscv_attributes = push(new RiscvAttributesSection); if constexpr (is_ppc64v1) ctx.extra.opd = push(new PPC64OpdSection); diff --git a/elf/relocatable.cc b/elf/relocatable.cc index 22a61a473d..395db8dc40 100644 --- a/elf/relocatable.cc +++ b/elf/relocatable.cc @@ -54,6 +54,9 @@ static void r_create_synthetic_sections(Context &ctx) { ctx.symtab = push(new SymtabSection); ctx.shstrtab = push(new ShstrtabSection); ctx.note_property = push(new NotePropertySection); + + if constexpr (is_riscv) + ctx.extra.riscv_attributes = push(new RiscvAttributesSection); } // Create SHT_GROUP (i.e. comdat group) sections. We uniquify comdat diff --git a/test/elf/riscv64_attributes.sh b/test/elf/riscv64_attributes.sh new file mode 100755 index 0000000000..c16ea5f506 --- /dev/null +++ b/test/elf/riscv64_attributes.sh @@ -0,0 +1,20 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <