From 7005f39162b4f99a666bccfd138fba6f88ac90e2 Mon Sep 17 00:00:00 2001 From: Dana Jansens Date: Tue, 3 Dec 2024 15:43:50 -0500 Subject: [PATCH] Introduce AnyRawId as a polymorphic field type in typed instructions (#4606) The Any[...] instruction group structs have a field layout that matches the specific typed instructions that it groups together. However those specific instructions may have different field types, or different numbers of fields, making it impossible for the Any[...] instruction group to match them all at once. In this case, it can use the AnyRawId field type to represent that the specific typed instructions have different field types in that position, or may not have a field at all. Previously we used `int32_t` as this polymorphic placeholder type, which was implicit and worked somewhat magically: - It was not listed in IdKind - It was was not implemented explicitly for either FromRaw or ToRaw However it happened to work because: - FromRaw was implemented for every T, and would build as long as T was constructible from the raw id value, which is int32_t. - In As for converting a specific instruction to a group instruction: For any given id field type T in the specific type struct, it would be converted to its raw (int32_t) value, then the AnyGroupInstruction field would be constructed with FromRaw since the polymorphic field type was int32_t. - Of course int32_t is constructible from int32_t. - And if the specific instruction did not have a field in the matching position, the AnyGroupInstruction's field would be default constructed, and int32_t is default constructible. This allowed As to construct the AnyGroupInstruction type from each of its specific instruction types regardless of what field types they had. We can make this more explicit by using a type other than int32_t. We introduce the AnyRawId specifically for the purpose of being a polymorphic field type in Any[...] instruction groups. The AnyRawId type _is_ part of IdKind, removing the need for a special case in the documentation and rules about field types. And this allows us to `require` that T is in IdKind for FromRaw. - This also pointed out that FromRaw and ToRaw do not need to be implemented for BuiltinTypeKind anymore, as this is no longer a field type in any typed instructions, further simplifying the rules to not need any exceptions for what is a valid field type. The AnyRawId type participates in FromRaw by being a member of IdKind and being constructible from int32_t. The AnyRawId type is default constuctible so that when the specific typed instruction has no field in the matching position, it will be default-constructed with the InstId::InvalidIndex value. The AnyRawId type does _not_ participate in ToRaw, and this is documented on the type. This is because conversion from specific typed instruction to an instruction group is lossy (due to the polymorphism which requires the AnyRawId type). As such conversion from the instruction group to a specific instruction is not possible, and thus the Any[...] instruction group does not need to support being converted to raw id values. This is rebased on #4604. --------- Co-authored-by: Richard Smith --- toolchain/sem_ir/id_kind.h | 13 +++++++------ toolchain/sem_ir/ids.h | 19 +++++++++++++++++++ toolchain/sem_ir/inst.h | 9 +-------- toolchain/sem_ir/typed_insts.h | 6 ++---- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/toolchain/sem_ir/id_kind.h b/toolchain/sem_ir/id_kind.h index 7e079a31cf809..6c31524f2cf4b 100644 --- a/toolchain/sem_ir/id_kind.h +++ b/toolchain/sem_ir/id_kind.h @@ -123,12 +123,13 @@ class TypeEnum { using IdKind = TypeEnum< // From base/value_store.h. IntId, RealId, FloatId, StringLiteralValueId, - // From sem_ir/id.h. - InstId, AbsoluteInstId, ConstantId, EntityNameId, CompileTimeBindIndex, - RuntimeParamIndex, FacetTypeId, FunctionId, ClassId, InterfaceId, ImplId, - GenericId, SpecificId, ImportIRId, ImportIRInstId, LocId, BoolValue, - IntKind, NameId, NameScopeId, InstBlockId, StructTypeFieldsId, TypeId, - TypeBlockId, ElementIndex, LibraryNameId, FloatKind>; + // From sem_ir/ids.h. + InstId, AbsoluteInstId, AnyRawId, ConstantId, EntityNameId, + CompileTimeBindIndex, RuntimeParamIndex, FacetTypeId, FunctionId, ClassId, + InterfaceId, ImplId, GenericId, SpecificId, ImportIRId, ImportIRInstId, + LocId, BoolValue, IntKind, NameId, NameScopeId, InstBlockId, + StructTypeFieldsId, TypeId, TypeBlockId, ElementIndex, LibraryNameId, + FloatKind>; } // namespace Carbon::SemIR diff --git a/toolchain/sem_ir/ids.h b/toolchain/sem_ir/ids.h index 08f3a4c30800a..6a47e931ee0d6 100644 --- a/toolchain/sem_ir/ids.h +++ b/toolchain/sem_ir/ids.h @@ -820,6 +820,25 @@ struct LocId : public IdBase, public Printable { constexpr LocId LocId::Invalid = LocId(Parse::NodeId::Invalid); +// Polymorphic id for fields in `Any[...]` typed instruction category. Used for +// fields where the specific instruction structs have different field types in +// that position or do not have a field in that position at all. Allows +// conversion with `Inst::As<>` from the specific typed instruction to the +// `Any[...]` instruction category. +// +// This type participates in `Inst::FromRaw` in order to convert from specific +// instructions to an `Any[...]` instruction category: +// - In the case the specific instruction has a field of some `IdKind` in the +// same position, the `Any[...]` type will hold its raw value in the +// `AnyRawId` field. +// - In the case the specific instruction has no field in the same position, the +// `Any[...]` type will hold a default constructed `AnyRawId` with an invalid +// value. +struct AnyRawId : public IdBase { + constexpr AnyRawId() : IdBase(IdBase::InvalidIndex) {} + constexpr explicit AnyRawId(int32_t id) : IdBase(id) {} +}; + } // namespace Carbon::SemIR #endif // CARBON_TOOLCHAIN_SEM_IR_IDS_H_ diff --git a/toolchain/sem_ir/inst.h b/toolchain/sem_ir/inst.h index ba02d016e8ce4..41efba4b52abd 100644 --- a/toolchain/sem_ir/inst.h +++ b/toolchain/sem_ir/inst.h @@ -16,7 +16,6 @@ #include "toolchain/base/int.h" #include "toolchain/base/value_store.h" #include "toolchain/sem_ir/block_value_store.h" -#include "toolchain/sem_ir/builtin_inst_kind.h" #include "toolchain/sem_ir/id_kind.h" #include "toolchain/sem_ir/inst_kind.h" #include "toolchain/sem_ir/singleton_insts.h" @@ -278,12 +277,10 @@ class Inst : public Printable { // Convert a field to its raw representation, used as `arg0_` / `arg1_`. static constexpr auto ToRaw(IdBase base) -> int32_t { return base.index; } static constexpr auto ToRaw(IntId id) -> int32_t { return id.AsRaw(); } - static constexpr auto ToRaw(BuiltinInstKind kind) -> int32_t { - return kind.AsInt(); - } // Convert a field from its raw representation. template + requires IdKind::Contains static constexpr auto FromRaw(int32_t raw) -> T { return T(raw); } @@ -291,10 +288,6 @@ class Inst : public Printable { constexpr auto FromRaw(int32_t raw) -> IntId { return IntId::MakeRaw(raw); } - template <> - constexpr auto FromRaw(int32_t raw) -> BuiltinInstKind { - return BuiltinInstKind::FromInt(raw); - } int32_t kind_; TypeId type_id_; diff --git a/toolchain/sem_ir/typed_insts.h b/toolchain/sem_ir/typed_insts.h index c0bb7c61414ef..18b708df0fc34 100644 --- a/toolchain/sem_ir/typed_insts.h +++ b/toolchain/sem_ir/typed_insts.h @@ -25,8 +25,6 @@ // - Up to two members describing the contents of the struct. These are types // listed in the `SemIR::IdKind` type-enum, typically derived from `IdBase`. // -// TODO: There are some `int32_t` fields, which breaks the above rule, how/why? -// // The field names here matter -- the fields must have the names specified // above, when present. When converting to a `SemIR::Inst`, the `kind` and // `type_id` fields will become the kind and type associated with the @@ -82,7 +80,7 @@ struct AnyFoundationDecl { InstKind kind; InstId foundation_type_inst_id; // Kind-specific data. - int32_t arg1; + AnyRawId arg1; }; // An adapted type declaration in a class, of the form `adapt T;`. @@ -422,7 +420,7 @@ struct AnyBranch { // Branches don't produce a value, so have no type. InstBlockId target_id; // Kind-specific data. - int32_t arg1; + AnyRawId arg1; }; // Control flow to branch to the target block.