-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathbuild.rs
441 lines (403 loc) · 15 KB
/
build.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
#[cfg(feature = "static-grammar-libs")]
use anyhow::bail;
#[cfg(feature = "static-grammar-libs")]
use thiserror::Error;
#[cfg(feature = "static-grammar-libs")]
use cargo_emit::{rerun_if_changed, rerun_if_env_changed};
#[cfg(feature = "static-grammar-libs")]
use rayon::prelude::*;
#[cfg(feature = "static-grammar-libs")]
use std::{
env,
fmt::Display,
fs,
path::{Path, PathBuf},
vec,
};
use anyhow::Result;
use std::fmt::Write;
/// Compilation information as it pertains to a tree-sitter grammar
///
/// This contains information about a parser that is required at build time
#[cfg(feature = "static-grammar-libs")]
#[derive(Debug, Default)]
struct GrammarCompileInfo<'a> {
/// The language's display name
display_name: &'a str,
/// The location of the grammar's source relative to `build.rs`
path: PathBuf,
/// The sources to compile with a C compiler
c_sources: Vec<&'a str>,
/// The sources to compile with a C++ compiler
///
/// The files supplied here will be compiled into a library named
/// "tree-sitter-{language}-cpp-compile-diffsitter" to avoid clashing with other symbols.
cpp_sources: Vec<&'a str>,
/// Additional include paths to pass to the compiler.
///
/// By default this is set to <path>/include, but some repos may have a different include path.
include_paths: Option<Vec<PathBuf>>,
}
/// The compilation parameters that are passed into the `compile_grammar` function
///
/// This is a convenience method that was created so we can store parameters in a vector and use
/// a parallel iterator to compile all of the grammars at once over a threadpool.
#[cfg(feature = "static-grammar-libs")]
struct CompileParams {
pub include_dirs: Vec<PathBuf>,
pub c_sources: Vec<PathBuf>,
pub cpp_sources: Vec<PathBuf>,
pub display_name: String,
}
/// An error that can arise when sanity check compilation parameters
#[cfg(feature = "static-grammar-libs")]
#[derive(Debug, Error)]
enum CompileParamError {
#[error("Subdirectory for grammar {0} was not found")]
SubdirectoryNotFound(String),
#[error("Source files {source_files:?} not found for {grammar}")]
SourceFilesNotFound {
/// The name of the grammar that had an error
grammar: String,
/// The missing source files
source_files: Vec<String>,
},
}
/// Environment variables that the build system relies on
///
/// If any of these are changed, Cargo will rebuild the project.
#[cfg(feature = "static-grammar-libs")]
const BUILD_ENV_VARS: &[&str] = &["CC", "CXX", "LD_LIBRARY_PATH", "PATH"];
/// Generated the code fo the map between the language identifiers and the function to initialize
/// the language parser
#[cfg(feature = "static-grammar-libs")]
fn codegen_language_map<T: ToString + Display>(languages: &[T]) -> String {
let body: String = languages.iter().fold(String::new(), |mut buffer, lang| {
writeln!(buffer, "\"{lang}\" => tree_sitter_{lang},").unwrap();
buffer
});
let map_decl = format!(
"\nstatic LANGUAGES: phf::Map<&'static str, unsafe extern \"C\" fn() -> Language> = phf_map! {{\n {body}\n }};\n");
map_decl
}
/// Compile a language's grammar
///
/// This builds the grammars and statically links them into the Rust binary using Crichton's cc
/// library. We name the libraries "{`grammar_name`}-[cc|cxx]-diffsitter" to prevent clashing with
/// any existing installed tree sitter libraries.
#[cfg(feature = "static-grammar-libs")]
fn compile_grammar(
includes: &[PathBuf],
c_sources: &[PathBuf],
cpp_sources: &[PathBuf],
output_name: &str,
) -> Result<(), cc::Error> {
// NOTE: that we have to compile the C sources first because the scanner depends on the parser.
// Right now the only C libraries are parsers, so we build them before the C++ files, which
// are only scanners. This resolves a linker error we were seeing on Linux.
if !c_sources.is_empty() {
cc::Build::new()
.includes(includes)
.files(c_sources)
.flag_if_supported("-std=c11")
.warnings(false)
.extra_warnings(false)
.try_compile(&format!("{output_name}-cc-diffsitter"))?;
}
if !cpp_sources.is_empty() {
cc::Build::new()
.cpp(true)
.includes(includes)
.files(cpp_sources)
.flag_if_supported("-std=c++17")
.warnings(false)
.extra_warnings(false)
.try_compile(&format!("{}-cxx-diffsitter", &output_name))?;
}
Ok(())
}
/// Print any other cargo-emit directives
#[cfg(feature = "static-grammar-libs")]
fn extra_cargo_directives() {
for &env_var in BUILD_ENV_VARS {
rerun_if_env_changed!(env_var);
}
}
/// Preprocess grammar compilation info so the build script can find all of the source files.
///
/// This will augment the C and C++ source files so that they have the full relative path from the
/// repository root rather, which prepends the repository path and `src/` to the file.
///
/// For example, a `GrammarCompileInfo` instance for Rust:
///
/// ```rust
/// GrammarCompileInfo {
/// display_name: "rust",
/// path: PathBuf::from("grammars/tree-sitter-rust"),
/// c_sources: vec!["parser.c", "scanner.c"],
/// ..GrammarCompileInfo::default()
/// };
/// ```
///
/// will get turned to:
///
/// ```rust
/// CompileParams {
/// display_name: "rust",
/// path: PathBuf::from("grammars/tree-sitter-rust"),
/// c_sources: vec![
/// "grammars/tree-sitter-rust/src/parser.c",
/// "grammars/tree-sitter-rust/src/scanner.c"
/// ],
/// cpp_sources: vec![],
/// };
/// ```
#[cfg(feature = "static-grammar-libs")]
fn preprocess_compile_info(grammar: &GrammarCompileInfo) -> CompileParams {
let dir = grammar.path.join("src");
// The directory to the source files
let include_dirs = if let Some(includes) = grammar.include_paths.clone() {
includes.clone()
} else {
vec![dir.clone()]
};
let cpp_sources: Vec<_> = grammar
.cpp_sources
.iter()
// Prepend {grammar-repo}/src path to each file
.map(|&filename| dir.join(filename))
.collect();
let c_sources: Vec<_> = grammar
.c_sources
.iter()
// Prepend {grammar-repo}/src path to each file
.map(|&filename| dir.join(filename))
.collect();
CompileParams {
include_dirs,
c_sources,
cpp_sources,
display_name: grammar.display_name.into(),
}
}
/// Sanity check the contents of a compilation info unit.
///
/// This should give clearer errors up front compared to the more obscure errors you can get from
/// the C/C++ toolchains when files are missing.
#[cfg(feature = "static-grammar-libs")]
fn verify_compile_params(compile_params: &CompileParams) -> Result<(), CompileParamError> {
for include_dir in &compile_params.include_dirs {
if !include_dir.exists() {
return Err(CompileParamError::SubdirectoryNotFound(
compile_params.display_name.to_string(),
));
}
}
let missing_sources = compile_params
.c_sources
.iter()
.chain(compile_params.cpp_sources.iter())
.filter_map(|file| {
// Filter for files that *don't* exist
if file.exists() {
None
} else {
Some(file.to_string_lossy().to_string())
}
})
.collect::<Vec<String>>();
if !missing_sources.is_empty() {
return Err(CompileParamError::SourceFilesNotFound {
grammar: compile_params.display_name.to_string(),
source_files: missing_sources,
});
}
Ok(())
}
/// Grammar compilation information for diffsitter.
///
/// This defines all of the grammars that are used by the build script. If you want to add new
/// grammars, add them to this list. This would ideally be a global static vector, but we can't
/// create a `const static` because the `PathBuf` constructors can't be evaluated at compile time.
#[cfg(feature = "static-grammar-libs")]
fn grammars() -> Vec<GrammarCompileInfo<'static>> {
let grammars = vec![
GrammarCompileInfo {
display_name: "rust",
path: PathBuf::from("grammars/tree-sitter-rust"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "cpp",
path: PathBuf::from("grammars/tree-sitter-cpp"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "python",
path: PathBuf::from("grammars/tree-sitter-python"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "bash",
path: PathBuf::from("grammars/tree-sitter-bash"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "ocaml",
path: PathBuf::from("grammars/tree-sitter-ocaml/grammars/ocaml"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "go",
path: PathBuf::from("grammars/tree-sitter-go"),
c_sources: vec!["parser.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "ruby",
path: PathBuf::from("grammars/tree-sitter-ruby"),
c_sources: vec!["parser.c"],
cpp_sources: vec!["scanner.cc"],
..GrammarCompileInfo::default()
},
GrammarCompileInfo {
display_name: "java",
path: PathBuf::from("grammars/tree-sitter-java"),
c_sources: vec!["parser.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "c_sharp",
path: PathBuf::from("grammars/tree-sitter-c-sharp"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "css",
path: PathBuf::from("grammars/tree-sitter-css"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "php",
path: PathBuf::from("grammars/tree-sitter-php/php"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "json",
path: PathBuf::from("grammars/tree-sitter-json"),
c_sources: vec!["parser.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "hcl",
path: PathBuf::from("grammars/tree-sitter-hcl"),
c_sources: vec!["parser.c"],
cpp_sources: vec!["scanner.cc"],
..Default::default()
},
GrammarCompileInfo {
display_name: "typescript",
path: PathBuf::from("grammars/tree-sitter-typescript/typescript"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "tsx",
path: PathBuf::from("grammars/tree-sitter-typescript/tsx"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "c",
path: PathBuf::from("grammars/tree-sitter-c"),
c_sources: vec!["parser.c"],
..Default::default()
},
GrammarCompileInfo {
display_name: "markdown",
path: PathBuf::from("grammars/tree-sitter-markdown/tree-sitter-markdown"),
c_sources: vec!["parser.c", "scanner.c"],
..Default::default()
}, // Add new grammars here...
];
grammars
}
/// Compile the submodules as static grammars for the binary.
#[cfg(feature = "static-grammar-libs")]
fn compile_static_grammars() -> Result<()> {
let grammars = grammars();
// The string represented the generated code that we get from the tree sitter grammars
let mut codegen = String::from(
r"
use tree_sitter::Language;
use phf::phf_map;
",
);
// A vector of language strings that are used later for codegen, so we can dynamically created
// the unsafe functions that load the grammar for each language
let mut languages = Vec::with_capacity(grammars.len());
// We create a vector of parameters so we can use Rayon's parallel iterators to compile
// grammars in parallel
let compile_params: Vec<CompileParams> = grammars.iter().map(preprocess_compile_info).collect();
// Verify each preprocessed compile param entry -- this will short circuit the build script if
// there are any errors
compile_params
.iter()
.map(verify_compile_params)
.collect::<Result<Vec<_>, CompileParamError>>()?;
// Any of the compilation steps failing will short circuit the entire `collect` function and
// error out
compile_params
.par_iter()
.map(|p| {
compile_grammar(
&p.include_dirs,
&p.c_sources[..],
&p.cpp_sources[..],
&p.display_name,
)
})
.collect::<Result<Vec<_>, _>>()?;
// Run the follow up tasks for the compiled sources
for params in &compile_params {
let language = ¶ms.display_name;
// If compilation succeeded with either case, link the language. If it failed, we'll never
// get to this step.
writeln!(
codegen,
"extern \"C\" {{ pub fn tree_sitter_{language}() -> Language; }}"
)?;
languages.push(language.as_str());
// We recompile the libraries if any grammar sources or this build file change, since Cargo
// will cache based on the Rust modules and isn't aware of the linked C libraries.
for source in params.c_sources.iter().chain(params.cpp_sources.iter()) {
if let Some(grammar_path) = &source.as_path().to_str() {
rerun_if_changed!((*grammar_path).to_string());
} else {
bail!("Path to grammar for {} is not a valid string", language);
}
}
}
extra_cargo_directives();
codegen += &codegen_language_map(&languages[..]);
// Write the generated code to a file in the resulting build directory
let codegen_out_dir = env::var_os("OUT_DIR").unwrap();
let codegen_path = Path::new(&codegen_out_dir).join("generated_grammar.rs");
fs::write(codegen_path, codegen)?;
Ok(())
}
fn main() -> anyhow::Result<()> {
#[cfg(feature = "static-grammar-libs")]
compile_static_grammars()?;
#[cfg(feature = "better-build-info")]
shadow_rs::new().map_err(|e| return anyhow::anyhow!(e.to_string()))?;
// TODO(afnan): add generaetd shell completion scripts
Ok(())
}