Skip to content

feat(embedded): Add multiple experimental manifest syntaxes #13241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -91,7 +91,6 @@ sha2 = "0.10.8"
shell-escape = "0.1.5"
supports-hyperlinks = "2.1.0"
snapbox = { version = "0.4.15", features = ["diff", "path"] }
syn = { version = "2.0.46", features = ["extra-traits", "full"] }
tar = { version = "0.4.40", default-features = false }
tempfile = "3.9.0"
thiserror = "1.0.56"
@@ -179,7 +178,6 @@ opener.workspace = true
os_info.workspace = true
pasetors.workspace = true
pathdiff.workspace = true
pulldown-cmark.workspace = true
rand.workspace = true
regex.workspace = true
rusqlite.workspace = true
@@ -192,7 +190,6 @@ serde_json = { workspace = true, features = ["raw_value"] }
sha1.workspace = true
shell-escape.workspace = true
supports-hyperlinks.workspace = true
syn.workspace = true
tar.workspace = true
tempfile.workspace = true
time.workspace = true
735 changes: 84 additions & 651 deletions src/cargo/util/toml/embedded.rs
Original file line number Diff line number Diff line change
@@ -18,10 +18,7 @@ pub(super) fn expand_manifest(
let source = split_source(content)?;
if let Some(frontmatter) = source.frontmatter {
match source.info {
Some("cargo") => {}
None => {
anyhow::bail!("frontmatter is missing an infostring; specify `cargo` for embedding a manifest");
}
Some("cargo") | None => {}
Some(other) => {
if let Some(remainder) = other.strip_prefix("cargo,") {
anyhow::bail!("cargo does not support frontmatter infostring attributes like `{remainder}` at this time")
@@ -66,17 +63,8 @@ pub(super) fn expand_manifest(
let manifest = toml::to_string_pretty(&manifest)?;
Ok(manifest)
} else {
// Legacy doc-comment support; here only for transitional purposes
let comment = extract_comment(content)?.unwrap_or_default();
let manifest = match extract_manifest(&comment)? {
Some(manifest) => Some(manifest),
None => {
tracing::trace!("failed to extract manifest");
None
}
}
.unwrap_or_default();
let manifest = expand_manifest_(&manifest, path, config)
let frontmatter = "";
let manifest = expand_manifest_(frontmatter, path, config)
.with_context(|| format!("failed to parse manifest at {}", path.display()))?;
let manifest = toml::to_string_pretty(&manifest)?;
Ok(manifest)
@@ -229,17 +217,29 @@ fn split_source(input: &str) -> CargoResult<Source<'_>> {
source.content = content;
}

// Experiment: let us try which char works better
let tick_char = source
.content
.chars()
.filter(|c| ['`', '#', '-'].contains(c))
.next()
.unwrap_or('`');

let tick_end = source
.content
.char_indices()
.find_map(|(i, c)| (c != '`').then_some(i))
.find_map(|(i, c)| (c != tick_char).then_some(i))
.unwrap_or(source.content.len());
let (fence_pattern, rest) = match tick_end {
0 => {
return Ok(source);
}
1 | 2 => {
anyhow::bail!("found {tick_end} backticks in rust frontmatter, expected at least 3")
if tick_char == '#' {
// Attribute
return Ok(source);
}
Comment on lines +238 to +241
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this explicitly check for #![] or #[]?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not get into the complexity of matching rust syntax. If we go this route, we can further evaluate what the exact behavior should be.

anyhow::bail!("found {tick_end} `{tick_char}` in rust frontmatter, expected at least 3")
}
_ => source.content.split_at(tick_end),
};
@@ -268,323 +268,6 @@ fn split_source(input: &str) -> CargoResult<Source<'_>> {
Ok(source)
}

/// Locates a "code block manifest" in Rust source.
fn extract_comment(input: &str) -> CargoResult<Option<String>> {
let mut doc_fragments = Vec::new();
let file = syn::parse_file(input)?;
// HACK: `syn` doesn't tell us what kind of comment was used, so infer it from how many
// attributes were used
let kind = if 1 < file
.attrs
.iter()
.filter(|attr| attr.meta.path().is_ident("doc"))
.count()
{
CommentKind::Line
} else {
CommentKind::Block
};
for attr in &file.attrs {
if attr.meta.path().is_ident("doc") {
doc_fragments.push(DocFragment::new(attr, kind)?);
}
}
if doc_fragments.is_empty() {
return Ok(None);
}
unindent_doc_fragments(&mut doc_fragments);

let mut doc_comment = String::new();
for frag in &doc_fragments {
add_doc_fragment(&mut doc_comment, frag);
}

Ok(Some(doc_comment))
}

/// A `#[doc]`
#[derive(Clone, Debug)]
struct DocFragment {
/// The attribute value
doc: String,
/// Indentation used within `doc
indent: usize,
}

impl DocFragment {
fn new(attr: &syn::Attribute, kind: CommentKind) -> CargoResult<Self> {
let syn::Meta::NameValue(nv) = &attr.meta else {
anyhow::bail!("unsupported attr meta for {:?}", attr.meta.path())
};
let syn::Expr::Lit(syn::ExprLit {
lit: syn::Lit::Str(lit),
..
}) = &nv.value
else {
anyhow::bail!("only string literals are supported")
};
Ok(Self {
doc: beautify_doc_string(lit.value(), kind),
indent: 0,
})
}
}

#[derive(Clone, Copy, PartialEq, Debug)]
enum CommentKind {
Line,
Block,
}

/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
///
/// See `rustc_ast/util/comments.rs`
fn beautify_doc_string(data: String, kind: CommentKind) -> String {
fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
let mut i = 0;
let mut j = lines.len();
// first line of all-stars should be omitted
if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
i += 1;
}

// like the first, a last line of all stars should be omitted
if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') {
j -= 1;
}

if i != 0 || j != lines.len() {
Some((i, j))
} else {
None
}
}

fn get_horizontal_trim(lines: &[&str], kind: CommentKind) -> Option<String> {
let mut i = usize::MAX;
let mut first = true;

// In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are
// present. However, we first need to strip the empty lines so they don't get in the middle
// when we try to compute the "horizontal trim".
let lines = match kind {
CommentKind::Block => {
// Whatever happens, we skip the first line.
let mut i = lines
.get(0)
.map(|l| {
if l.trim_start().starts_with('*') {
0
} else {
1
}
})
.unwrap_or(0);
let mut j = lines.len();

while i < j && lines[i].trim().is_empty() {
i += 1;
}
while j > i && lines[j - 1].trim().is_empty() {
j -= 1;
}
&lines[i..j]
}
CommentKind::Line => lines,
};

for line in lines {
for (j, c) in line.chars().enumerate() {
if j > i || !"* \t".contains(c) {
return None;
}
if c == '*' {
if first {
i = j;
first = false;
} else if i != j {
return None;
}
break;
}
}
if i >= line.len() {
return None;
}
}
if lines.is_empty() {
None
} else {
Some(lines[0][..i].into())
}
}

let data_s = data.as_str();
if data_s.contains('\n') {
let mut lines = data_s.lines().collect::<Vec<&str>>();
let mut changes = false;
let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
changes = true;
// remove whitespace-only lines from the start/end of lines
&mut lines[i..j]
} else {
&mut lines
};
if let Some(horizontal) = get_horizontal_trim(lines, kind) {
changes = true;
// remove a "[ \t]*\*" block from each line, if possible
for line in lines.iter_mut() {
if let Some(tmp) = line.strip_prefix(&horizontal) {
*line = tmp;
if kind == CommentKind::Block
&& (*line == "*" || line.starts_with("* ") || line.starts_with("**"))
{
*line = &line[1..];
}
}
}
}
if changes {
return lines.join("\n");
}
}
data
}

/// Removes excess indentation on comments in order for the Markdown
/// to be parsed correctly. This is necessary because the convention for
/// writing documentation is to provide a space between the /// or //! marker
/// and the doc text, but Markdown is whitespace-sensitive. For example,
/// a block of text with four-space indentation is parsed as a code block,
/// so if we didn't unindent comments, these list items
///
/// /// A list:
/// ///
/// /// - Foo
/// /// - Bar
///
/// would be parsed as if they were in a code block, which is likely not what the user intended.
///
/// See also `rustc_resolve/rustdoc.rs`
fn unindent_doc_fragments(docs: &mut [DocFragment]) {
// HACK: We can't tell the difference between `#[doc]` and doc-comments, so we can't specialize
// the indentation like rustodc does
let add = 0;

// `min_indent` is used to know how much whitespaces from the start of each lines must be
// removed. Example:
//
// ```
// /// hello!
// #[doc = "another"]
// ```
//
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment
.doc
.as_str()
.lines()
.fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace =
line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
min_indent.min(whitespace)
}
})
})
.min()
else {
return;
};

for fragment in docs {
if fragment.doc.is_empty() {
continue;
}

let min_indent = if min_indent > 0 {
min_indent - add
} else {
min_indent
};

fragment.indent = min_indent;
}
}

/// The goal of this function is to apply the `DocFragment` transformation that is required when
/// transforming into the final Markdown, which is applying the computed indent to each line in
/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
///
/// Note: remove the trailing newline where appropriate
///
/// See also `rustc_resolve/rustdoc.rs`
fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
let s = frag.doc.as_str();
let mut iter = s.lines();
if s.is_empty() {
out.push('\n');
return;
}
while let Some(line) = iter.next() {
if line.chars().any(|c| !c.is_whitespace()) {
assert!(line.len() >= frag.indent);
out.push_str(&line[frag.indent..]);
} else {
out.push_str(line);
}
out.push('\n');
}
}

/// Extracts the first `Cargo` fenced code block from a chunk of Markdown.
fn extract_manifest(comment: &str) -> CargoResult<Option<String>> {
use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};

// To match librustdoc/html/markdown.rs, opts.
let exts = Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES;

let md = Parser::new_ext(comment, exts);

let mut inside = false;
let mut output = None;

for item in md {
match item {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info)))
if info.to_lowercase() == "cargo" =>
{
if output.is_some() {
anyhow::bail!("multiple `cargo` manifests present")
} else {
output = Some(String::new());
}
inside = true;
}
Event::Text(ref text) if inside => {
let s = output.get_or_insert(String::new());
s.push_str(text);
}
Event::End(Tag::CodeBlock(_)) if inside => {
inside = false;
}
_ => (),
}
}

Ok(output)
}

#[cfg(test)]
mod test_expand {
use super::*;
@@ -627,10 +310,10 @@ strip = true

#[test]
fn test_dependencies() {
snapbox::assert_eq(
snapbox::assert_matches(
r#"[[bin]]
name = "test-"
path = "/home/me/test.rs"
path = [..]
[dependencies]
time = "0.1.25"
@@ -649,361 +332,111 @@ strip = true
[workspace]
"#,
si!(r#"
//! ```cargo
//! [dependencies]
//! time="0.1.25"
//! ```
fn main() {}
"#),
);
}
}

#[cfg(test)]
mod test_comment {
use super::*;

macro_rules! ec {
($s:expr) => {
extract_comment($s)
.unwrap_or_else(|err| panic!("{}", err))
.unwrap()
};
}

#[test]
fn test_no_comment() {
assert_eq!(
None,
extract_comment(
r#"
fn main () {
}
"#,
)
.unwrap()
);
}

#[test]
fn test_no_comment_she_bang() {
assert_eq!(
None,
extract_comment(
r#"#!/usr/bin/env cargo-eval
fn main () {
}
"#,
)
.unwrap()
);
}

#[test]
fn test_comment() {
snapbox::assert_eq(
r#"Here is a manifest:
```cargo
si!(r#"```cargo
[dependencies]
time = "*"
time="0.1.25"
```
"#,
ec!(r#"//! Here is a manifest:
//!
//! ```cargo
//! [dependencies]
//! time = "*"
//! ```
fn main() {}
"#),
);
}

#[test]
fn test_comment_shebang() {
snapbox::assert_eq(
r#"Here is a manifest:
fn test_no_infostring() {
snapbox::assert_matches(
r#"[[bin]]
name = "test-"
path = [..]
```cargo
[dependencies]
time = "*"
```
"#,
ec!(r#"#!/usr/bin/env cargo-eval
//! Here is a manifest:
//!
//! ```cargo
//! [dependencies]
//! time = "*"
//! ```
fn main() {}
"#),
);
}
time = "0.1.25"
#[test]
fn test_multiline_comment() {
snapbox::assert_eq(
r#"Here is a manifest:
[package]
autobenches = false
autobins = false
autoexamples = false
autotests = false
build = false
edition = "2021"
name = "test-"
```cargo
[dependencies]
time = "*"
```
"#,
ec!(r#"/*!
Here is a manifest:
[profile.release]
strip = true
```cargo
[workspace]
"#,
si!(r#"```
[dependencies]
time = "*"
time="0.1.25"
```
*/
fn main() {
}
fn main() {}
"#),
);
}

#[test]
fn test_multiline_comment_shebang() {
snapbox::assert_eq(
r#"Here is a manifest:
```cargo
[dependencies]
time = "*"
```
"#,
ec!(r#"#!/usr/bin/env cargo-eval
/*!
Here is a manifest:
fn test_dash() {
snapbox::assert_matches(
r#"[[bin]]
name = "test-"
path = [..]
```cargo
[dependencies]
time = "*"
```
*/
time = "0.1.25"
fn main() {
}
"#),
);
}
[package]
autobenches = false
autobins = false
autoexamples = false
autotests = false
build = false
edition = "2021"
name = "test-"
#[test]
fn test_multiline_block_comment() {
snapbox::assert_eq(
r#"Here is a manifest:
[profile.release]
strip = true
```cargo
[dependencies]
time = "*"
```
[workspace]
"#,
ec!(r#"/*!
* Here is a manifest:
*
* ```cargo
* [dependencies]
* time = "*"
* ```
*/
fn main() {}
"#),
);
}

#[test]
fn test_multiline_block_comment_shebang() {
snapbox::assert_eq(
r#"Here is a manifest:
```cargo
si!(r#"---
[dependencies]
time = "*"
```
"#,
ec!(r#"#!/usr/bin/env cargo-eval
/*!
* Here is a manifest:
*
* ```cargo
* [dependencies]
* time = "*"
* ```
*/
time="0.1.25"
---
fn main() {}
"#),
);
}

#[test]
fn test_adjacent_comments() {
snapbox::assert_eq(
r#"Here is a manifest:
fn test_hash() {
snapbox::assert_matches(
r#"[[bin]]
name = "test-"
path = [..]
```cargo
[dependencies]
time = "*"
```
"#,
ec!(r#"#!/usr/bin/env cargo-eval
// I am a normal comment
//! Here is a manifest:
//!
//! ```cargo
//! [dependencies]
//! time = "*"
//! ```
time = "0.1.25"
fn main () {
}
"#),
);
}
[package]
autobenches = false
autobins = false
autoexamples = false
autotests = false
build = false
edition = "2021"
name = "test-"
#[test]
fn test_doc_attrib() {
snapbox::assert_eq(
r#"Here is a manifest:
[profile.release]
strip = true
```cargo
[dependencies]
time = "*"
```
[workspace]
"#,
ec!(r###"#!/usr/bin/env cargo-eval
#![doc = r#"Here is a manifest:
```cargo
si!(r#"###
[dependencies]
time = "*"
```
"#]
fn main () {
}
"###),
);
}
}

#[cfg(test)]
mod test_manifest {
use super::*;

macro_rules! smm {
($c:expr) => {
extract_manifest($c)
};
}

#[test]
fn test_no_code_fence() {
assert_eq!(
smm!(
r#"There is no manifest in this comment.
"#
)
.unwrap(),
None
);
}

#[test]
fn test_no_cargo_code_fence() {
assert_eq!(
smm!(
r#"There is no manifest in this comment.
```
This is not a manifest.
```
```rust
println!("Nor is this.");
```
Or this.
"#
)
.unwrap(),
None
);
}

#[test]
fn test_cargo_code_fence() {
assert_eq!(
smm!(
r#"This is a manifest:
```cargo
dependencies = { time = "*" }
```
"#
)
.unwrap(),
Some(
r#"dependencies = { time = "*" }
"#
.into()
)
);
}

#[test]
fn test_mixed_code_fence() {
assert_eq!(
smm!(
r#"This is *not* a manifest:
```
He's lying, I'm *totally* a manifest!
```
This *is*:
```cargo
dependencies = { time = "*" }
```
"#
)
.unwrap(),
Some(
r#"dependencies = { time = "*" }
"#
.into()
)
time="0.1.25"
###
fn main() {}
"#),
);
}

#[test]
fn test_two_cargo_code_fence() {
assert!(smm!(
r#"This is a manifest:
```cargo
dependencies = { time = "*" }
```
So is this, but it doesn't count:
```cargo
dependencies = { explode = true }
```
"#
)
.is_err());
}
}