From 049689590ab3956a12f16903639a8f92a7d77e53 Mon Sep 17 00:00:00 2001 From: danakj Date: Fri, 29 Nov 2024 11:31:57 -0500 Subject: [PATCH] Introduce AnyRawId as a polymorphic field type in typed instructions The Any[...] instruction group structs have a field layout that matches the specific typed instructions that it groups together. However those specific instructions may have different field types, or different numbers of fields, making it impossible for the Any[...] instruction group to match them all at once. In this case, it can use the AnyRawId field type to represent that the specific typed instructions have different field types in that position, or may not have a field at all. Previously we used `int32_t` as this polymorphic placeholder type, which was implicit and worked somewhat magically: - It was not listed in IdKind - It was was not implemented explicitly for either FromRaw or ToRaw However it happened to work because: - FromRaw was implemented for every T, and would build as long as T was constructible from the raw id value, which is int32_t. - In As for converting a specific instruction to a group instruction: For any given id field type T in the specific type struct, it would be converted to its raw (int32_t) value, then the AnyGroupInstruction field would be constructed with FromRaw since the polymorphic field type was int32_t. - Of course int32_t is constructible from int32_t. - And if the specific instruction did not have a field in the matching position, the AnyGroupInstruction's field would be default constructed, and int32_t is default constructible. This allowed As to construct the AnyGroupInstruction type from each of its specific instruction types regardless of what field types they had. We can make this more explicit by using a type other than int32_t. We introduce the AnyRawId specifically for the purpose of being a polymorphic field type in Any[...] instruction groups. The AnyRawId type _is_ part of IdKind, removing the need for a special case in the documentation and rules about field types. And this allows us to `require` that T is in IdKind for FromRaw. - This also pointed out that FromRaw and ToRaw do not need to be implemented for BuiltinTypeKind anymore, as this is no longer a field type in any typed instructions, further simplifying the rules to not need any exceptions for what is a valid field type. The AnyRawId type participates in FromRaw by being a member of IdKind and being constructible from int32_t. The AnyRawId type is default constuctible so that when the specific typed instruction has no field in the matching position, it will be default-constructed with the InstId::InvalidIndex value. The AnyRawId type does _not_ participate in ToRaw, and this is documented on the type. This is because conversion from specific typed instruction to an instruction group is lossy (due to the polymorphism which requires the AnyRawId type). As such conversion from the instruction group to a specific instruction is not possible, and thus the Any[...] instruction group does not need to support being converted to raw id values. This is rebased on #4604. --- toolchain/sem_ir/id_kind.h | 13 +++++++------ toolchain/sem_ir/ids.h | 19 +++++++++++++++++++ toolchain/sem_ir/inst.h | 9 +-------- toolchain/sem_ir/typed_insts.h | 6 ++---- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/toolchain/sem_ir/id_kind.h b/toolchain/sem_ir/id_kind.h index 7e079a31cf809..6c31524f2cf4b 100644 --- a/toolchain/sem_ir/id_kind.h +++ b/toolchain/sem_ir/id_kind.h @@ -123,12 +123,13 @@ class TypeEnum { using IdKind = TypeEnum< // From base/value_store.h. IntId, RealId, FloatId, StringLiteralValueId, - // From sem_ir/id.h. - InstId, AbsoluteInstId, ConstantId, EntityNameId, CompileTimeBindIndex, - RuntimeParamIndex, FacetTypeId, FunctionId, ClassId, InterfaceId, ImplId, - GenericId, SpecificId, ImportIRId, ImportIRInstId, LocId, BoolValue, - IntKind, NameId, NameScopeId, InstBlockId, StructTypeFieldsId, TypeId, - TypeBlockId, ElementIndex, LibraryNameId, FloatKind>; + // From sem_ir/ids.h. + InstId, AbsoluteInstId, AnyRawId, ConstantId, EntityNameId, + CompileTimeBindIndex, RuntimeParamIndex, FacetTypeId, FunctionId, ClassId, + InterfaceId, ImplId, GenericId, SpecificId, ImportIRId, ImportIRInstId, + LocId, BoolValue, IntKind, NameId, NameScopeId, InstBlockId, + StructTypeFieldsId, TypeId, TypeBlockId, ElementIndex, LibraryNameId, + FloatKind>; } // namespace Carbon::SemIR diff --git a/toolchain/sem_ir/ids.h b/toolchain/sem_ir/ids.h index c0e25d15ae2c1..3bfebf5356abc 100644 --- a/toolchain/sem_ir/ids.h +++ b/toolchain/sem_ir/ids.h @@ -957,6 +957,25 @@ struct LocId : public IdBase, public Printable { constexpr LocId LocId::Invalid = LocId(Parse::NodeId::Invalid); +// Polymorphic id for `Any[...]` typed instructions. Used for fields where the +// specific instruction structs have different field types in that position or +// do not have a field in that position at all. Allows conversion with +// `Inst::As<>` from the specific typed instruction to the `Any[...]` +// instruction group. +// +// This participates in `Inst::FromRaw` in order to convert from specific +// instructions, but does not participate in `Inst::ToRaw` as it's not possible +// to convert in the other direction. +// +// - In the case the specific instruction has a field of some `IdKind` in the +// same position, the `Any[...]` type will hold its raw value in the +// `AnyRawId` field. +// - In the case the specific instruction has no field in the same position, the +// `Any[...]` type will hold a default constructed `AnyRawId`. +struct AnyRawId { + int32_t raw_id = InstId::InvalidIndex; +}; + } // namespace Carbon::SemIR #endif // CARBON_TOOLCHAIN_SEM_IR_IDS_H_ diff --git a/toolchain/sem_ir/inst.h b/toolchain/sem_ir/inst.h index f2f3554b8979f..e07f06bcdc01d 100644 --- a/toolchain/sem_ir/inst.h +++ b/toolchain/sem_ir/inst.h @@ -16,7 +16,6 @@ #include "toolchain/base/int.h" #include "toolchain/base/value_store.h" #include "toolchain/sem_ir/block_value_store.h" -#include "toolchain/sem_ir/builtin_inst_kind.h" #include "toolchain/sem_ir/id_kind.h" #include "toolchain/sem_ir/inst_kind.h" #include "toolchain/sem_ir/typed_insts.h" @@ -267,12 +266,10 @@ class Inst : public Printable { // Convert a field to its raw representation, used as `arg0_` / `arg1_`. static constexpr auto ToRaw(IdBase base) -> int32_t { return base.index; } static constexpr auto ToRaw(IntId id) -> int32_t { return id.AsRaw(); } - static constexpr auto ToRaw(BuiltinInstKind kind) -> int32_t { - return kind.AsInt(); - } // Convert a field from its raw representation. template + requires IdKind::Contains static constexpr auto FromRaw(int32_t raw) -> T { return T(raw); } @@ -280,10 +277,6 @@ class Inst : public Printable { constexpr auto FromRaw(int32_t raw) -> IntId { return IntId::MakeRaw(raw); } - template <> - constexpr auto FromRaw(int32_t raw) -> BuiltinInstKind { - return BuiltinInstKind::FromInt(raw); - } int32_t kind_; TypeId type_id_; diff --git a/toolchain/sem_ir/typed_insts.h b/toolchain/sem_ir/typed_insts.h index 73469dd6e0341..59897501735ba 100644 --- a/toolchain/sem_ir/typed_insts.h +++ b/toolchain/sem_ir/typed_insts.h @@ -23,8 +23,6 @@ // - Up to two members describing the contents of the struct. These are types // listed in the `SemIR::IdKind` type-enum, typically derived from `IdBase`. // -// TODO: There are some `int32_t` fields, which breaks the above rule, how/why? -// // The field names here matter -- the fields must have the names specified // above, when present. When converting to a `SemIR::Inst`, the `kind` and // `type_id` fields will become the kind and type associated with the @@ -83,7 +81,7 @@ struct AnyFoundationDecl { InstKind kind; InstId foundation_type_inst_id; // Kind-specific data. - int32_t arg1; + AnyRawId arg1; }; // An adapted type declaration in a class, of the form `adapt T;`. @@ -423,7 +421,7 @@ struct AnyBranch { // Branches don't produce a value, so have no type. InstBlockId target_id; // Kind-specific data. - int32_t arg1; + AnyRawId arg1; }; // Control flow to branch to the target block.