Skip to content
This repository was archived by the owner on Aug 31, 2023. It is now read-only.

Commit ce39e3f

Browse files
committed
feat(rome_service): recycle the node cache across parsing sessions
1 parent b6e7a39 commit ce39e3f

File tree

11 files changed

+160
-38
lines changed

11 files changed

+160
-38
lines changed

crates/rome_js_parser/src/parse.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use rome_parser::event::Event;
1010
use rome_parser::token_source::Trivia;
1111
use rome_parser::AnyParse;
1212

13-
use rome_rowan::AstNode;
13+
use rome_rowan::{AstNode, NodeCache};
1414
use std::marker::PhantomData;
1515

1616
/// A utility struct for managing the result of a parser job
@@ -199,9 +199,20 @@ pub fn parse_module(text: &str, file_id: FileId) -> Parse<JsModule> {
199199

200200
/// Parses the provided string as a EcmaScript program using the provided syntax features.
201201
pub fn parse(text: &str, file_id: FileId, source_type: SourceType) -> Parse<AnyJsRoot> {
202+
let mut cache = NodeCache::default();
203+
parse_with_cache(text, file_id, source_type, &mut cache)
204+
}
205+
206+
/// Parses the provided string as a EcmaScript program using the provided syntax features and node cache.
207+
pub fn parse_with_cache(
208+
text: &str,
209+
file_id: FileId,
210+
source_type: SourceType,
211+
cache: &mut NodeCache,
212+
) -> Parse<AnyJsRoot> {
202213
tracing::debug_span!("parse", file_id = ?file_id).in_scope(move || {
203214
let (events, errors, tokens) = parse_common(text, file_id, source_type);
204-
let mut tree_sink = JsLosslessTreeSink::new(text, &tokens);
215+
let mut tree_sink = JsLosslessTreeSink::with_cache(text, &tokens, cache);
205216
rome_parser::event::process(&mut tree_sink, events, errors);
206217
let (green, parse_errors) = tree_sink.finish();
207218
Parse::new(green, parse_errors)

crates/rome_json_parser/src/lib.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use rome_json_syntax::{JsonLanguage, JsonRoot, JsonSyntaxNode};
88
pub use rome_parser::prelude::*;
99
use rome_parser::tree_sink::LosslessTreeSink;
1010
use rome_parser::AnyParse;
11-
use rome_rowan::AstNode;
11+
use rome_rowan::{AstNode, NodeCache};
1212

1313
mod lexer;
1414
mod parser;
@@ -20,14 +20,19 @@ pub(crate) type JsonLosslessTreeSink<'source> =
2020
LosslessTreeSink<'source, JsonLanguage, JsonSyntaxFactory>;
2121

2222
pub fn parse_json(source: &str, file_id: FileId) -> JsonParse {
23+
let mut cache = NodeCache::default();
24+
parse_json_with_cache(source, file_id, &mut cache)
25+
}
26+
27+
pub fn parse_json_with_cache(source: &str, file_id: FileId, cache: &mut NodeCache) -> JsonParse {
2328
tracing::debug_span!("parse", file_id = ?file_id).in_scope(move || {
2429
let mut parser = JsonParser::new(source, file_id);
2530

2631
parse_root(&mut parser);
2732

2833
let (events, diagnostics, trivia) = parser.finish();
2934

30-
let mut tree_sink = JsonLosslessTreeSink::new(source, &trivia);
35+
let mut tree_sink = JsonLosslessTreeSink::with_cache(source, &trivia, cache);
3136
rome_parser::event::process(&mut tree_sink, events, diagnostics);
3237
let (green, diagnostics) = tree_sink.finish();
3338

crates/rome_parser/src/tree_sink.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
use crate::prelude::*;
22
use crate::token_source::Trivia;
33
use rome_rowan::{
4-
Language, SyntaxFactory, SyntaxKind, SyntaxNode, TextRange, TextSize, TreeBuilder, TriviaPiece,
4+
Language, NodeCache, SyntaxFactory, SyntaxKind, SyntaxNode, TextRange, TextSize, TreeBuilder,
5+
TriviaPiece,
56
};
67

78
/// An abstraction for syntax tree implementations
@@ -37,7 +38,7 @@ where
3738
trivia_pos: usize,
3839
parents_count: usize,
3940
errors: Vec<ParseDiagnostic>,
40-
inner: TreeBuilder<'static, L, Factory>,
41+
inner: TreeBuilder<'a, L, Factory>,
4142
/// Signal that the sink must generate an EOF token when its finishing. See [LosslessTreeSink::finish] for more details.
4243
needs_eof: bool,
4344
trivia_pieces: Vec<TriviaPiece>,
@@ -93,6 +94,20 @@ where
9394
}
9495
}
9596

97+
pub fn with_cache(text: &'a str, trivia: &'a [Trivia], cache: &'a mut NodeCache) -> Self {
98+
Self {
99+
text,
100+
trivia_list: trivia,
101+
text_pos: 0.into(),
102+
trivia_pos: 0,
103+
parents_count: 0,
104+
inner: TreeBuilder::with_cache(cache),
105+
errors: vec![],
106+
needs_eof: true,
107+
trivia_pieces: Vec::with_capacity(128),
108+
}
109+
}
110+
96111
/// Finishes the tree and return the root node with possible parser errors.
97112
///
98113
/// If tree is finished without a [rome_rowan::SyntaxKind::EOF], one will be generated and all pending trivia

crates/rome_rowan/src/green.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod trivia;
77
pub(crate) use self::{
88
element::{GreenElement, GreenElementRef},
99
node::{Child, Children, GreenNode, GreenNodeData, Slot},
10+
node_cache::LiveSet,
1011
token::{GreenToken, GreenTokenData},
1112
trivia::GreenTrivia,
1213
};

crates/rome_rowan/src/green/node_cache.rs

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use hashbrown::hash_map::{RawEntryMut, RawOccupiedEntryMut, RawVacantEntryMut};
22
use rome_text_size::TextSize;
3-
use rustc_hash::FxHasher;
3+
use rustc_hash::{FxHashSet, FxHasher};
44
use std::hash::{BuildHasherDefault, Hash, Hasher};
55

66
use crate::green::Slot;
@@ -80,6 +80,16 @@ fn element_id(elem: GreenElementRef<'_>) -> *const () {
8080
}
8181
}
8282

83+
/// The [LiveSet] tracks which nodes and tokens are reachable in the most
84+
/// recent revision of the syntax tree. It is filled as the syntax tree gets
85+
/// rebuilt, and is used to evit unused entries from the cache at the end of a
86+
/// parsing session.
87+
#[derive(Default, Debug)]
88+
pub(crate) struct LiveSet {
89+
nodes: FxHashSet<*const GreenNodeData>,
90+
tokens: FxHashSet<*const GreenTokenData>,
91+
}
92+
8393
impl NodeCache {
8494
/// Hash used for nodes that haven't been cached because it has too many slots or
8595
/// one of its children wasn't cached.
@@ -90,11 +100,12 @@ impl NodeCache {
90100
/// Returns an entry that allows the caller to:
91101
/// * Retrieve the cached node if it is present in the cache
92102
/// * Insert a node if it isn't present in the cache
93-
pub(crate) fn node(
94-
&mut self,
103+
pub(crate) fn node<'a>(
104+
&'a mut self,
95105
kind: RawSyntaxKind,
96106
children: &[(u64, GreenElement)],
97-
) -> NodeCacheNodeEntryMut {
107+
live_set: Option<&'a mut LiveSet>,
108+
) -> NodeCacheNodeEntryMut<'a> {
98109
if children.len() > 3 {
99110
return NodeCacheNodeEntryMut::NoCache(Self::UNCACHED_NODE_HASH);
100111
}
@@ -136,20 +147,32 @@ impl NodeCache {
136147
});
137148

138149
match entry {
139-
RawEntryMut::Occupied(entry) => NodeCacheNodeEntryMut::Cached(CachedNodeEntry {
140-
hash,
141-
raw_entry: entry,
142-
}),
150+
RawEntryMut::Occupied(entry) => {
151+
if let Some(live_set) = live_set {
152+
live_set.nodes.insert(&*entry.key().node);
153+
}
154+
155+
NodeCacheNodeEntryMut::Cached(CachedNodeEntry {
156+
hash,
157+
raw_entry: entry,
158+
})
159+
}
143160
RawEntryMut::Vacant(entry) => NodeCacheNodeEntryMut::Vacant(VacantNodeEntry {
144161
raw_entry: entry,
145162
original_kind: kind,
146163
hash,
164+
live_set,
147165
}),
148166
}
149167
}
150168

151-
pub(crate) fn token(&mut self, kind: RawSyntaxKind, text: &str) -> (u64, GreenToken) {
152-
self.token_with_trivia(kind, text, &[], &[])
169+
pub(crate) fn token(
170+
&mut self,
171+
kind: RawSyntaxKind,
172+
text: &str,
173+
live_set: Option<&mut LiveSet>,
174+
) -> (u64, GreenToken) {
175+
self.token_with_trivia(kind, text, &[], &[], live_set)
153176
}
154177

155178
pub(crate) fn token_with_trivia(
@@ -158,6 +181,7 @@ impl NodeCache {
158181
text: &str,
159182
leading: &[TriviaPiece],
160183
trailing: &[TriviaPiece],
184+
live_set: Option<&mut LiveSet>,
161185
) -> (u64, GreenToken) {
162186
let hash = token_hash_of(kind, text);
163187

@@ -178,8 +202,29 @@ impl NodeCache {
178202
}
179203
};
180204

205+
if let Some(live_set) = live_set {
206+
live_set.tokens.insert(&*token);
207+
}
208+
181209
(hash, token)
182210
}
211+
212+
pub(crate) fn is_empty(&self) -> bool {
213+
self.nodes.is_empty() && self.tokens.is_empty() && self.trivia.is_empty()
214+
}
215+
216+
/// Removes nodes and tokens from the cache that are not present in the provided [LiveSet]
217+
pub(crate) fn evict_unreachable(&mut self, live_set: LiveSet) {
218+
self.nodes.drain_filter(|node, _| {
219+
let key: *const GreenNodeData = &*node.node;
220+
!live_set.nodes.contains(&key)
221+
});
222+
223+
self.tokens.drain_filter(|token, _| {
224+
let key: *const GreenTokenData = &*token.0;
225+
!live_set.tokens.contains(&key)
226+
});
227+
}
183228
}
184229

185230
pub(crate) enum NodeCacheNodeEntryMut<'a> {
@@ -199,6 +244,7 @@ pub(crate) struct VacantNodeEntry<'a> {
199244
hash: u64,
200245
original_kind: RawSyntaxKind,
201246
raw_entry: RawVacantEntryMut<'a, CachedNode, (), BuildHasherDefault<FxHasher>>,
247+
live_set: Option<&'a mut LiveSet>,
202248
}
203249

204250
/// Represents an entry of a cached node.
@@ -231,6 +277,10 @@ impl<'a> VacantNodeEntry<'a> {
231277
// unknown node. Never cache these nodes because cache lookups will never match.
232278
NodeCache::UNCACHED_NODE_HASH
233279
} else {
280+
if let Some(live_set) = self.live_set {
281+
live_set.nodes.insert(&*node);
282+
}
283+
234284
self.raw_entry.insert_with_hasher(
235285
self.hash,
236286
CachedNode {
@@ -316,6 +366,10 @@ impl TriviaCache {
316366

317367
h.finish()
318368
}
369+
370+
fn is_empty(&self) -> bool {
371+
self.cache.is_empty()
372+
}
319373
}
320374

321375
#[cfg(test)]

crates/rome_rowan/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub use rome_text_size::{TextLen, TextRange, TextSize};
3636

3737
pub use crate::{
3838
ast::*,
39-
green::RawSyntaxKind,
39+
green::{NodeCache, RawSyntaxKind},
4040
syntax::{
4141
chain_trivia_pieces, ChainTriviaPiecesIterator, Language, SendNode, SyntaxElement,
4242
SyntaxElementChildren, SyntaxKind, SyntaxList, SyntaxNode, SyntaxNodeChildren,

crates/rome_rowan/src/tree_builder.rs

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
use crate::green::NodeCacheNodeEntryMut;
21
use crate::{
32
cow_mut::CowMut,
4-
green::{GreenElement, NodeCache},
3+
green::{GreenElement, LiveSet, NodeCache, NodeCacheNodeEntryMut},
54
syntax::TriviaPiece,
65
GreenNode, Language, NodeOrToken, ParsedChildren, SyntaxFactory, SyntaxKind, SyntaxNode,
76
};
@@ -17,6 +16,7 @@ pub struct TreeBuilder<'cache, L: Language, S: SyntaxFactory<Kind = L::Kind>> {
1716
cache: CowMut<'cache, NodeCache>,
1817
parents: Vec<(L::Kind, usize)>,
1918
children: Vec<(u64, GreenElement)>,
19+
live_set: Option<LiveSet>,
2020
ph: PhantomData<S>,
2121
}
2222

@@ -26,6 +26,7 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> Default for TreeBuilder<'_,
2626
cache: CowMut::default(),
2727
parents: Vec::default(),
2828
children: Vec::default(),
29+
live_set: None,
2930
ph: PhantomData,
3031
}
3132
}
@@ -40,10 +41,16 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
4041
/// Reusing `NodeCache` between different [TreeBuilder]`s saves memory.
4142
/// It allows to structurally share underlying trees.
4243
pub fn with_cache(cache: &mut NodeCache) -> TreeBuilder<'_, L, S> {
44+
let has_cached_nodes = !cache.is_empty();
4345
TreeBuilder {
4446
cache: CowMut::Borrowed(cache),
4547
parents: Vec::new(),
4648
children: Vec::new(),
49+
// We don't need to track the set of live nodes and tokens if the
50+
// cache was initially empty, as it means all the entries present
51+
// in the cache at the end of the parsing session were inserted by
52+
// this instance of the TreeBuilder
53+
live_set: has_cached_nodes.then(LiveSet::default),
4754
ph: PhantomData,
4855
}
4956
}
@@ -67,7 +74,9 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
6774
/// Adds new token to the current branch.
6875
#[inline]
6976
pub fn token(&mut self, kind: L::Kind, text: &str) -> &mut Self {
70-
let (hash, token) = self.cache.token(kind.to_raw(), text);
77+
let (hash, token) = self
78+
.cache
79+
.token(kind.to_raw(), text, self.live_set.as_mut());
7180
self.children.push((hash, token.into()));
7281
self
7382
}
@@ -81,9 +90,13 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
8190
leading: &[TriviaPiece],
8291
trailing: &[TriviaPiece],
8392
) {
84-
let (hash, token) = self
85-
.cache
86-
.token_with_trivia(kind.to_raw(), text, leading, trailing);
93+
let (hash, token) = self.cache.token_with_trivia(
94+
kind.to_raw(),
95+
text,
96+
leading,
97+
trailing,
98+
self.live_set.as_mut(),
99+
);
87100
self.children.push((hash, token.into()));
88101
}
89102

@@ -103,7 +116,7 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
103116
let raw_kind = kind.to_raw();
104117

105118
let slots = &self.children[first_child..];
106-
let node_entry = self.cache.node(raw_kind, slots);
119+
let node_entry = self.cache.node(raw_kind, slots, self.live_set.as_mut());
107120

108121
let mut build_node = || {
109122
let children = ParsedChildren::new(&mut self.children, first_child);
@@ -184,13 +197,19 @@ impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
184197
/// are paired!
185198
#[inline]
186199
#[must_use]
187-
pub fn finish(self) -> SyntaxNode<L> {
188-
SyntaxNode::new_root(self.finish_green())
200+
pub fn finish(mut self) -> SyntaxNode<L> {
201+
let root = SyntaxNode::new_root(self.finish_green());
202+
203+
if let Some(live_set) = self.live_set.take() {
204+
self.cache.evict_unreachable(live_set);
205+
}
206+
207+
root
189208
}
190209

191210
// For tests
192211
#[must_use]
193-
pub(crate) fn finish_green(mut self) -> GreenNode {
212+
pub(crate) fn finish_green(&mut self) -> GreenNode {
194213
assert_eq!(self.children.len(), 1);
195214
match self.children.pop().unwrap().1 {
196215
NodeOrToken::Node(node) => node,

crates/rome_service/src/file_handlers/javascript.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use rome_js_syntax::{
3333
AnyJsRoot, JsLanguage, JsSyntaxNode, SourceType, TextRange, TextSize, TokenAtOffset,
3434
};
3535
use rome_parser::AnyParse;
36-
use rome_rowan::{AstNode, BatchMutationExt, Direction};
36+
use rome_rowan::{AstNode, BatchMutationExt, Direction, NodeCache};
3737
use std::borrow::Cow;
3838
use std::fmt::Debug;
3939
use tracing::debug;
@@ -116,7 +116,12 @@ impl ExtensionHandler for JsFileHandler {
116116
}
117117
}
118118

119-
fn parse(rome_path: &RomePath, language_hint: LanguageId, text: &str) -> AnyParse {
119+
fn parse(
120+
rome_path: &RomePath,
121+
language_hint: LanguageId,
122+
text: &str,
123+
cache: &mut NodeCache,
124+
) -> AnyParse {
120125
let file_id = rome_path.file_id();
121126

122127
let source_type =
@@ -127,7 +132,7 @@ fn parse(rome_path: &RomePath, language_hint: LanguageId, text: &str) -> AnyPars
127132
_ => SourceType::js_module(),
128133
});
129134

130-
let parse = rome_js_parser::parse(text, file_id, source_type);
135+
let parse = rome_js_parser::parse_with_cache(text, file_id, source_type, cache);
131136
AnyParse::from(parse)
132137
}
133138

0 commit comments

Comments
 (0)