From ca46f48f83e12a801d4e7cb0bba05879f94db9f5 Mon Sep 17 00:00:00 2001 From: Jason Steving Date: Thu, 16 Nov 2023 15:31:43 -0800 Subject: [PATCH] Update Claro's Dep Module Monomorphization Codegen Strategy This addresses an extremely subtle bug in the previous approach. To summarize, previously dep module monomorphization codegen was placed in an "extra" top-level class, always named $DepModuleMonomorphization. This was intended to ensure that transitive calls from dep module generic procs would naturally produce codegen that was applicable in whichever file it's codegen appeared in. This had the unintended side-effect, however, of having the class generated by different modules conflicting. The underlying java_libary() and java_binary() rule implementations ended up "deduplicating" the $DepModuleMonomorphization classes and while compilation would succeed, interestingly, at runtime the deploy Jar's $DepModuleMonomorphization class would just include some (not quite) random subset of the programs overall monomorphizations, and so there would actually be surprising java.lang.NoSuchFieldError's thrown at runtime when trying to call those procedures. Very interesting! But very problematic. This new approach leverages knowledge of this esoteric behavior to solve the runtime error issues. Now, monomorphizations are no longer wrapped in an intermediary $DepModuleMonomorphizations class, and instead each monomorphization is generated inside its own custom class named `$MONOMORPHIZATION$$`. This results in the final deploy jar containing a single class for each monomorphization needed in the entire program actually leveraging this Jar level class deduplication for my advantage such that each compilation unit can just codegen as necessary and the final Jar won't have any duplicates. The following CL(s) will further leverage (abuse) this knowledge to actually start including the set of monomorphizations produced by the dependency sub-graph beneath each .claro_module so that any of its deps can actually completely skip codegen on any monomorphizations that any of it's direct or transitive dependencies have already generated. --- WORKSPACE | 4 +- .../buggy_buggies/data_structures/heap.claro | 2 +- .../dep_module_monomorphization_test/BUILD | 52 +++++++++++++++++++ .../MidA.claro | 5 ++ .../MidA.claro_module_api | 2 + .../MidB.claro | 5 ++ .../MidB.claro_module_api | 2 + .../bottom.claro | 4 ++ .../bottom.claro_module_api | 2 + .../top.claro | 4 ++ non_module_deps.bzl | 4 +- .../ProgramNode.java | 34 +++++++++--- .../functions/FunctionCallExpr.java | 25 +++++---- .../functions/ProviderFunctionCallExpr.java | 25 +++++---- .../statements/ConsumerFunctionCallStmt.java | 25 +++++---- 15 files changed, 152 insertions(+), 43 deletions(-) create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/BUILD create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro_module_api create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro_module_api create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro_module_api create mode 100644 examples/claro_programs/module_system/dep_module_monomorphization_test/top.claro diff --git a/WORKSPACE b/WORKSPACE index 4900bb2b..cfca5ebb 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -49,8 +49,8 @@ http_file( # In some way, it'd be nicer to make use of https://github.com/JasonSteving99/claro-lang/releases/latest/download/.. # instead of naming the release explicitly. However, this would make it impossible to cherrypick an old version and # rebuild without manual work. - sha256 = "df4dcd0f384c14e3deab02d85b835321fe850156a92f597c7f9d81fbeac20931", - url = "https://github.com/JasonSteving99/claro-lang/releases/download/v0.1.334/claro-cli-install.tar.gz", + sha256 = "47fe27aca709276e403f6107c5327a26b9640627db4f0d815c8a4160d8995617", + url = "https://github.com/JasonSteving99/claro-lang/releases/download/v0.1.335/claro-cli-install.tar.gz", ) # ClaroDocs is built atop Google's Closure Templates in order to ensure that I'm not generating unsafe html since the diff --git a/examples/claro_programs/demo_server/buggy_buggies/data_structures/heap.claro b/examples/claro_programs/demo_server/buggy_buggies/data_structures/heap.claro index 3476b936..b87cb364 100644 --- a/examples/claro_programs/demo_server/buggy_buggies/data_structures/heap.claro +++ b/examples/claro_programs/demo_server/buggy_buggies/data_structures/heap.claro @@ -31,7 +31,7 @@ unwrappers Heap { consumer insert(heap: Heap, value: struct {dist: int, pos: Pos::Position}) { - UNFORTUNATE_DEFAULT_IMPL_OF_append_THAT_WILL_BE_REMOVED_SOON(unwrap(heap), value); + lists::add(unwrap(heap), value); heapify_up(heap, len(unwrap(heap)) - 1); } diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/BUILD b/examples/claro_programs/module_system/dep_module_monomorphization_test/BUILD new file mode 100644 index 00000000..06d479d0 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/BUILD @@ -0,0 +1,52 @@ +load("//:rules.bzl", "claro_module", "claro_binary") + +# TODO(steving) TESTING!!! Currently due to the way that Bazel has implemented the builtin java_library() and +# TODO(steving) java_binary() rules, Claro's use of a common "extra" top-level `$DepModuleMonomorphizations` +# TODO(steving) non-public class in every module that calls generic procedures from its deps is broken as +# TODO(steving) the *_deploy.jar finally produced by the top-level java_binary() will actually dedupe the +# TODO(steving) `$DepModuleMonomorphizations` class from each of the java_library()'s below it and only accept +# TODO(steving) the last one generated. +# TODO(steving) While this is currently causing a bug in the functionality of monomorphization, this actually +# TODO(steving) represents a big code-size optimization opportunity. If each .claro_module can encode the complete set +# TODO(steving) of monomorphizations that it generated in its $DepModuleMonomorphization class (which is still +# TODO(steving) necessary for it's corresponding java_library() target to successfully build), then all of that +# TODO(steving) codegen can be blindly copied into any consuming Claro targets that depend on it. This will have a +# TODO(steving) two-fold optimization improvement. First, it will potentially allow downstream consumer targets to +# TODO(steving) actually completely skip dep module monomorphizations that it happened to already recieve from its +# TODO(steving) deps which should have the potential to improve compile times! Second, it will result in all of the +# TODO(steving) duplicated $DepModuleMonomorphization classes generated below the root claro_binary() node to get +# TODO(steving) completely overwritten by the single class at the root of the dep graph that will have accumulated ALL +# TODO(steving) of the monomorphizations necessary for the entire program! +claro_binary( + name = "top", + main_file = "top.claro", + deps = { + "Bottom": ":bottom", + "MidA": ":MidA", + "MidB": ":MidB", + } +) + +claro_module( + name = "MidA", + module_api_file = "MidA.claro_module_api", + srcs = ["MidA.claro"], + deps = { + "Bottom": ":bottom", + }, +) + +claro_module( + name = "MidB", + module_api_file = "MidB.claro_module_api", + srcs = ["MidB.claro"], + deps = { + "Bottom": ":bottom", + }, +) + +claro_module( + name = "bottom", + module_api_file = "bottom.claro_module_api", + srcs = ["bottom.claro"], +) \ No newline at end of file diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro new file mode 100644 index 00000000..1cd98180 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro @@ -0,0 +1,5 @@ + +function asList(a: string, b: int) -> [oneof] { + return Bottom::asList(a,b); +} +print(Bottom::asList(1, "one")); diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro_module_api b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro_module_api new file mode 100644 index 00000000..01efa31d --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidA.claro_module_api @@ -0,0 +1,2 @@ + +function asList(a: string, b: int) -> [oneof]; \ No newline at end of file diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro new file mode 100644 index 00000000..0678e220 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro @@ -0,0 +1,5 @@ + +function asList(a: string, b: float) -> [oneof] { + return Bottom::asList(a,b); +} +print(Bottom::asList(1, "one")); diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro_module_api b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro_module_api new file mode 100644 index 00000000..34a96398 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/MidB.claro_module_api @@ -0,0 +1,2 @@ + +function asList(a: string, b: float) -> [oneof]; \ No newline at end of file diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro b/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro new file mode 100644 index 00000000..3dd55812 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro @@ -0,0 +1,4 @@ + +function asList(a: A, b: B) -> [oneof] { + return [a, b]; +} diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro_module_api b/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro_module_api new file mode 100644 index 00000000..c44b40c4 --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/bottom.claro_module_api @@ -0,0 +1,2 @@ + +function asList(a: A, b: B) -> [oneof]; \ No newline at end of file diff --git a/examples/claro_programs/module_system/dep_module_monomorphization_test/top.claro b/examples/claro_programs/module_system/dep_module_monomorphization_test/top.claro new file mode 100644 index 00000000..d8790c9a --- /dev/null +++ b/examples/claro_programs/module_system/dep_module_monomorphization_test/top.claro @@ -0,0 +1,4 @@ + +print(Bottom::asList(1, "one")); +print(MidA::asList("two", 2)); +print(MidB::asList("two", 2.0)); diff --git a/non_module_deps.bzl b/non_module_deps.bzl index 5c6368cd..ee623ceb 100644 --- a/non_module_deps.bzl +++ b/non_module_deps.bzl @@ -20,8 +20,8 @@ def _non_module_deps_impl(ctx): ) http_file( name = "bootstrapping_claro_compiler_tarfile", - sha256 = "df4dcd0f384c14e3deab02d85b835321fe850156a92f597c7f9d81fbeac20931", - url = "https://github.com/JasonSteving99/claro-lang/releases/download/v0.1.334/claro-cli-install.tar.gz", + sha256 = "47fe27aca709276e403f6107c5327a26b9640627db4f0d815c8a4160d8995617", + url = "https://github.com/JasonSteving99/claro-lang/releases/download/v0.1.335/claro-cli-install.tar.gz", ) # ClaroDocs is built atop Google's Closure Templates in order to ensure that I'm not generating unsafe html since the # intention is for users to be able to trust and host ClaroDocs themselves (particularly relevant since ClaroDocs diff --git a/src/java/com/claro/intermediate_representation/ProgramNode.java b/src/java/com/claro/intermediate_representation/ProgramNode.java index 845754c7..7644f974 100644 --- a/src/java/com/claro/intermediate_representation/ProgramNode.java +++ b/src/java/com/claro/intermediate_representation/ProgramNode.java @@ -8,6 +8,7 @@ import com.claro.intermediate_representation.statements.*; import com.claro.intermediate_representation.statements.contracts.ContractDefinitionStmt; import com.claro.intermediate_representation.statements.contracts.ContractImplementationStmt; +import com.claro.intermediate_representation.statements.contracts.ContractProcedureImplementationStmt; import com.claro.intermediate_representation.statements.user_defined_type_def_stmts.*; import com.claro.intermediate_representation.types.BaseType; import com.claro.intermediate_representation.types.ClaroTypeException; @@ -16,6 +17,7 @@ import com.claro.internal_static_state.InternalStaticStateUtil; import com.claro.module_system.module_serialization.proto.SerializedClaroModule; import com.google.common.collect.*; +import com.google.common.hash.Hashing; import java.util.*; import java.util.function.Consumer; @@ -275,17 +277,33 @@ public StringBuilder generateJavaSourceOutput(ScopedHeap scopedHeap) { depModuleMonomorphization.getValue() ); } - res.append("\n// Dep Module Monomorphizations Generated Below:\n") - .append("final class $DepModuleMonomorphizations {\n"); + res.append("\n// Dep Module Monomorphizations Generated Below:\n"); for (String depModule : MonomorphizationCoordinator.monomorphizationsByModuleAndRequestCache.rowKeySet()) { - res.append("static final class $").append(depModule).append(" {\n"); - for (String monomorphization : MonomorphizationCoordinator.monomorphizationsByModuleAndRequestCache.row(depModule) - .values()) { - res.append(monomorphization).append("\n"); + for (Map.Entry entry + : MonomorphizationCoordinator.monomorphizationsByModuleAndRequestCache.row(depModule).entrySet()) { + IPCMessages.MonomorphizationRequest monomorphizationRequest = entry.getKey(); + String monomorphizationCodegen = entry.getValue(); + String genProcName = + String.format( + "%s__%s", + monomorphizationRequest.getProcedureName(), + Hashing.sha256().hashUnencodedChars( + ContractProcedureImplementationStmt.getCanonicalProcedureName( + "$MONOMORPHIZATION", + monomorphizationRequest.getConcreteTypeParamsList().stream() + .map(Types::parseTypeProto).collect(ImmutableList.toImmutableList()), + monomorphizationRequest.getProcedureName() + )) + ); + res.append("final class $MONOMORPHIZATION$") + .append(depModule) + .append('$') + .append(genProcName) + .append(" {\n"); + res.append(monomorphizationCodegen).append("\n"); + res.append("\n}\n"); } - res.append("\n}\n"); } - res.append("}\n"); // Cleanup any threads or subprocesses that got started up by monomorphization. MonomorphizationCoordinator.shutdownDepModuleMonomorphization(); diff --git a/src/java/com/claro/intermediate_representation/expressions/procedures/functions/FunctionCallExpr.java b/src/java/com/claro/intermediate_representation/expressions/procedures/functions/FunctionCallExpr.java index b00e055b..abfbb906 100644 --- a/src/java/com/claro/intermediate_representation/expressions/procedures/functions/FunctionCallExpr.java +++ b/src/java/com/claro/intermediate_representation/expressions/procedures/functions/FunctionCallExpr.java @@ -718,12 +718,12 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { depModDescriptor.getUniqueModuleName() ); }); + Optional hashedName = Optional.empty(); if (!this.name.contains("$MONOMORPHIZATION")) { scopedHeap.markIdentifierUsed(this.name); } else { this.hashNameForCodegen = true; if (this.optionalOriginatingDepModuleName.isPresent()) { - // TODO(steving) TESTING!!! This needs a complete overhaul to correctly point to the codegen in the local file. // Additionally, b/c this is a call to a monomorphization, if the procedure is actually located in a dep module, // that means that the monomorphization can't be guaranteed to *actually* have been generated in the dep // module's codegen. This is b/c all Claro Modules are compiled in isolation - meaning they don't know how @@ -733,9 +733,9 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { optionalNormalizedOriginatingDepModulePrefix = Optional.of( String.format( - "%s.$%s.", - "$DepModuleMonomorphizations", - ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())) + "$MONOMORPHIZATION$%s$%s.", + ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())), + (hashedName = Optional.of(getHashedName())).get() )); } } @@ -744,12 +744,7 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { // In order to call the actual monomorphization, we need to ensure that the name isn't too long for Java. // So, we're following a hack where all monomorphization names are sha256 hashed to keep them short while // still unique. - this.name = - String.format( - "%s__%s", - this.originalName, - Hashing.sha256().hashUnencodedChars(this.name).toString() - ); + this.name = hashedName.orElseGet(this::getHashedName); } AtomicReference exprsGenJavaSource = @@ -832,6 +827,16 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { return functionCallJavaSourceBody.createMerged(exprsGenJavaSource.get()); } + private String getHashedName() { + return String.format( + "%s__%s", + this.optionalOriginatingDepModuleName + .map(depMod -> this.originalName.replace(String.format("$DEP_MODULE$%s$", depMod), "")) + .orElse(this.originalName), + Hashing.sha256().hashUnencodedChars(this.name).toString() + ); + } + @Override public Object generateInterpretedOutput(ScopedHeap scopedHeap) { return ((Types.ProcedureType.ProcedureWrapper) scopedHeap.getIdentifierValue(this.name)) diff --git a/src/java/com/claro/intermediate_representation/expressions/procedures/functions/ProviderFunctionCallExpr.java b/src/java/com/claro/intermediate_representation/expressions/procedures/functions/ProviderFunctionCallExpr.java index 839ed569..310f9201 100644 --- a/src/java/com/claro/intermediate_representation/expressions/procedures/functions/ProviderFunctionCallExpr.java +++ b/src/java/com/claro/intermediate_representation/expressions/procedures/functions/ProviderFunctionCallExpr.java @@ -182,12 +182,12 @@ public StringBuilder generateJavaSourceBodyOutput(ScopedHeap scopedHeap) { depModDescriptor.getUniqueModuleName() ); }); + Optional hashedName = Optional.empty(); if (!this.functionName.contains("$MONOMORPHIZATION")) { scopedHeap.markIdentifierUsed(this.functionName); } else { this.hashNameForCodegen = true; if (this.optionalOriginatingDepModuleName.isPresent()) { - // TODO(steving) TESTING!!! This needs a complete overhaul to correctly point to the codegen in the local file. // Additionally, b/c this is a call to a monomorphization, if the procedure is actually located in a dep module, // that means that the monomorphization can't be guaranteed to *actually* have been generated in the dep // module's codegen. This is b/c all Claro Modules are compiled in isolation - meaning they don't know how @@ -197,9 +197,9 @@ public StringBuilder generateJavaSourceBodyOutput(ScopedHeap scopedHeap) { optionalNormalizedOriginatingDepModulePrefix = Optional.of( String.format( - "%s.$%s.", - "$DepModuleMonomorphizations", - ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())) + "$MONOMORPHIZATION$%s$%s.", + ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())), + (hashedName = Optional.of(getHashedName())).get() )); } } @@ -208,12 +208,7 @@ public StringBuilder generateJavaSourceBodyOutput(ScopedHeap scopedHeap) { // In order to call the actual monomorphization, we need to ensure that the name isn't too long for Java. // So, we're following a hack where all monomorphization names are sha256 hashed to keep them short while // still unique. - this.functionName = - String.format( - "%s__%s", - this.originalName, - Hashing.sha256().hashUnencodedChars(this.functionName).toString() - ); + this.functionName = hashedName.orElseGet(this::getHashedName); } StringBuilder res; @@ -242,6 +237,16 @@ public StringBuilder generateJavaSourceBodyOutput(ScopedHeap scopedHeap) { return res; } + private String getHashedName() { + return String.format( + "%s__%s", + this.optionalOriginatingDepModuleName + .map(depMod -> this.originalName.replace(String.format("$DEP_MODULE$%s$", depMod), "")) + .orElse(this.originalName), + Hashing.sha256().hashUnencodedChars(this.functionName).toString() + ); + } + @Override public Object generateInterpretedOutput(ScopedHeap scopedHeap) { return ((Types.ProcedureType.ProcedureWrapper) scopedHeap.getIdentifierValue(this.functionName)) diff --git a/src/java/com/claro/intermediate_representation/statements/ConsumerFunctionCallStmt.java b/src/java/com/claro/intermediate_representation/statements/ConsumerFunctionCallStmt.java index 7dd8d852..89e7fb76 100644 --- a/src/java/com/claro/intermediate_representation/statements/ConsumerFunctionCallStmt.java +++ b/src/java/com/claro/intermediate_representation/statements/ConsumerFunctionCallStmt.java @@ -255,12 +255,12 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { depModDescriptor.getUniqueModuleName() ); }); + Optional hashedName = Optional.empty(); if (!this.consumerName.contains("$MONOMORPHIZATION")) { scopedHeap.markIdentifierUsed(this.consumerName); } else { this.hashNameForCodegen = true; if (this.optionalOriginatingDepModuleName.isPresent()) { - // TODO(steving) TESTING!!! This needs a complete overhaul to correctly point to the codegen in the local file. // Additionally, b/c this is a call to a monomorphization, if the procedure is actually located in a dep module, // that means that the monomorphization can't be guaranteed to *actually* have been generated in the dep // module's codegen. This is b/c all Claro Modules are compiled in isolation - meaning they don't know how @@ -270,9 +270,9 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { optionalNormalizedOriginatingDepModulePrefix = Optional.of( String.format( - "%s.$%s.", - "$DepModuleMonomorphizations", - ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())) + "$MONOMORPHIZATION$%s$%s.", + ScopedHeap.getDefiningModuleDisambiguator(Optional.of(this.optionalOriginatingDepModuleName.get())), + (hashedName = Optional.of(getHashedName())).get() )); } } @@ -281,12 +281,7 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { // In order to call the actual monomorphization, we need to ensure that the name isn't too long for Java. // So, we're following a hack where all monomorphization names are sha256 hashed to keep them short while // still unique. - this.consumerName = - String.format( - "%s__%s", - this.originalName, - Hashing.sha256().hashUnencodedChars(this.consumerName).toString() - ); + this.consumerName = hashedName.orElseGet(this::getHashedName); } AtomicReference argValsGenJavaSource = @@ -343,6 +338,16 @@ public GeneratedJavaSource generateJavaSourceOutput(ScopedHeap scopedHeap) { return consumerFnGenJavaSource.createMerged(argValsGenJavaSource.get()); } + private String getHashedName() { + return String.format( + "%s__%s", + this.optionalOriginatingDepModuleName + .map(depMod -> this.originalName.replace(String.format("$DEP_MODULE$%s$", depMod), "")) + .orElse(this.originalName), + Hashing.sha256().hashUnencodedChars(this.consumerName).toString() + ); + } + @Override public Object generateInterpretedOutput(ScopedHeap scopedHeap) { return ((Types.ProcedureType.ProcedureWrapper) scopedHeap.getIdentifierValue(this.consumerName))