Do some tactical inlining across lexer and parser. (#4307)

These are based on looking at our compilation benchmark and looking at function bodies that seem surprising to not get inlined. Note that this will have a bit more impact on x86 where function call overhead (especially due to pushing and popping registers) is a bit higher than Arm. For a recent AMD server, this makes parsing around 15% faster, and full "check" phase 5% faster. Benchmark results: ``` name old cpu/op new cpu/op delta BM_CompileAPIFileDenseDecls<Phase::Lex>/256 40.2µs ± 2% 37.8µs ± 1% -5.89% (p=0.000 n=19+17) BM_CompileAPIFileDenseDecls<Phase::Lex>/1024 190µs ± 2% 181µs ± 2% -4.93% (p=0.000 n=19+18) BM_CompileAPIFileDenseDecls<Phase::Lex>/4096 779µs ± 1% 745µs ± 2% -4.29% (p=0.000 n=19+19) BM_CompileAPIFileDenseDecls<Phase::Lex>/16384 3.44ms ± 1% 3.32ms ± 3% -3.32% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Lex>/65536 14.6ms ± 2% 14.3ms ± 3% -2.46% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Lex>/262144 66.7ms ± 2% 65.0ms ± 4% -2.52% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/256 85.7µs ± 2% 71.3µs ± 2% -16.77% (p=0.000 n=20+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/1024 421µs ± 2% 352µs ± 2% -16.38% (p=0.000 n=20+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/4096 1.71ms ± 2% 1.44ms ± 2% -15.89% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/16384 7.19ms ± 2% 6.10ms ± 2% -15.24% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/65536 29.8ms ± 2% 25.3ms ± 2% -14.91% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Parse>/262144 127ms ± 2% 109ms ± 2% -14.28% (p=0.000 n=20+20) BM_CompileAPIFileDenseDecls<Phase::Check>/256 785µs ± 1% 752µs ± 1% -4.13% (p=0.000 n=20+18) BM_CompileAPIFileDenseDecls<Phase::Check>/1024 1.71ms ± 1% 1.62ms ± 1% -5.17% (p=0.000 n=20+18) BM_CompileAPIFileDenseDecls<Phase::Check>/4096 5.28ms ± 1% 4.97ms ± 1% -6.04% (p=0.000 n=20+19) BM_CompileAPIFileDenseDecls<Phase::Check>/16384 20.2ms ± 1% 19.0ms ± 2% -5.98% (p=0.000 n=20+20) BM_CompileAPIFileDenseDecls<Phase::Check>/65536 83.8ms ± 1% 78.9ms ± 2% -5.84% (p=0.000 n=19+20) BM_CompileAPIFileDenseDecls<Phase::Check>/262144 354ms ± 1% 335ms ± 1% -5.41% (p=0.000 n=19+20) ```
carbon-language · Sep 15, 2024 · 06344ae · 06344ae
1 parent 580e845
commit 06344ae
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 48 deletions.
diff --git a/toolchain/lex/tokenized_buffer.cpp b/toolchain/lex/tokenized_buffer.cpp
@@ -20,10 +20,6 @@
 
 namespace Carbon::Lex {
 
-auto TokenizedBuffer::GetKind(TokenIndex token) const -> TokenKind {
-  return GetTokenInfo(token).kind();
-}
-
 auto TokenizedBuffer::GetLine(TokenIndex token) const -> LineIndex {
   return FindLineIndex(GetTokenInfo(token).byte_offset());
 }
@@ -159,16 +155,6 @@ auto TokenizedBuffer::GetMatchedOpeningToken(TokenIndex closing_token) const
   return closing_token_info.opening_token_index();
 }
 
-auto TokenizedBuffer::HasLeadingWhitespace(TokenIndex token) const -> bool {
-  return GetTokenInfo(token).has_leading_space();
-}
-
-auto TokenizedBuffer::HasTrailingWhitespace(TokenIndex token) const -> bool {
-  TokenIterator it(token);
-  ++it;
-  return it != tokens().end() && GetTokenInfo(*it).has_leading_space();
-}
-
 auto TokenizedBuffer::IsRecoveryToken(TokenIndex token) const -> bool {
   if (recovery_tokens_.empty()) {
     return false;
@@ -359,20 +345,6 @@ auto TokenizedBuffer::AddLine(LineInfo info) -> LineIndex {
   return LineIndex(static_cast<int>(line_infos_.size()) - 1);
 }
 
-auto TokenizedBuffer::GetTokenInfo(TokenIndex token) -> TokenInfo& {
-  return token_infos_[token.index];
-}
-
-auto TokenizedBuffer::GetTokenInfo(TokenIndex token) const -> const TokenInfo& {
-  return token_infos_[token.index];
-}
-
-auto TokenizedBuffer::AddToken(TokenInfo info) -> TokenIndex {
-  token_infos_.push_back(info);
-  expected_max_parse_tree_size_ += info.kind().expected_max_parse_tree_size();
-  return TokenIndex(static_cast<int>(token_infos_.size()) - 1);
-}
-
 auto TokenizedBuffer::CollectMemUsage(MemUsage& mem_usage,
                                       llvm::StringRef label) const -> void {
   mem_usage.Add(MemUsage::ConcatLabel(label, "allocator_"), allocator_);

diff --git a/toolchain/lex/tokenized_buffer.h b/toolchain/lex/tokenized_buffer.h
@@ -477,6 +477,38 @@ using LexerDiagnosticEmitter = DiagnosticEmitter<const char*>;
 // A diagnostic emitter that uses tokens as its source of location information.
 using TokenDiagnosticEmitter = DiagnosticEmitter<TokenIndex>;
 
+inline auto TokenizedBuffer::GetKind(TokenIndex token) const -> TokenKind {
+  return GetTokenInfo(token).kind();
+}
+
+inline auto TokenizedBuffer::HasLeadingWhitespace(TokenIndex token) const
+    -> bool {
+  return GetTokenInfo(token).has_leading_space();
+}
+
+inline auto TokenizedBuffer::HasTrailingWhitespace(TokenIndex token) const
+    -> bool {
+  TokenIterator it(token);
+  ++it;
+  return it != tokens().end() && GetTokenInfo(*it).has_leading_space();
+}
+
+inline auto TokenizedBuffer::GetTokenInfo(TokenIndex token) -> TokenInfo& {
+  return token_infos_[token.index];
+}
+
+inline auto TokenizedBuffer::GetTokenInfo(TokenIndex token) const
+    -> const TokenInfo& {
+  return token_infos_[token.index];
+}
+
+inline auto TokenizedBuffer::AddToken(TokenInfo info) -> TokenIndex {
+  TokenIndex index(token_infos_.size());
+  token_infos_.push_back(info);
+  expected_max_parse_tree_size_ += info.kind().expected_max_parse_tree_size();
+  return index;
+}
+
 }  // namespace Carbon::Lex
 
 #endif  // CARBON_TOOLCHAIN_LEX_TOKENIZED_BUFFER_H_
diff --git a/toolchain/parse/context.cpp b/toolchain/parse/context.cpp
@@ -66,16 +66,6 @@ Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,
                tokens_->GetKind(*end_));
 }
 
-auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-    -> void {
-  tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
-}
-
-auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-    -> void {
-  tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
-}
-
 auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
                                      Lex::TokenIndex token, bool has_error)
     -> void {
@@ -143,13 +133,6 @@ auto Context::ConsumeChecked(Lex::TokenKind kind) -> Lex::TokenIndex {
   return Consume();
 }
 
-auto Context::ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::TokenIndex> {
-  if (!PositionIs(kind)) {
-    return std::nullopt;
-  }
-  return Consume();
-}
-
 auto Context::FindNextOf(std::initializer_list<Lex::TokenKind> desired_kinds)
     -> std::optional<Lex::TokenIndex> {
   auto new_position = position_;

diff --git a/toolchain/parse/context.h b/toolchain/parse/context.h
@@ -97,10 +97,14 @@ class Context {
 
   // Adds a node to the parse tree that has no children (a leaf).
   auto AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error = false)
-      -> void;
+      -> void {
+    tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
+  }
 
   // Adds a node to the parse tree that has children.
-  auto AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error) -> void;
+  auto AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error) -> void {
+    tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
+  }
 
   // Replaces the placeholder node at the indicated position with a leaf node.
   //
@@ -154,7 +158,12 @@ class Context {
 
   // If the current position's token matches this `Kind`, returns it and
   // advances to the next position. Otherwise returns an empty optional.
-  auto ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::TokenIndex>;
+  auto ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::TokenIndex> {
+    if (!PositionIs(kind)) {
+      return std::nullopt;
+    }
+    return Consume();
+  }
 
   // Find the next token of any of the given kinds at the current bracketing
   // level.