Support for multiple bib files in a bibliography (#296)

This commit is contained in:
Lars Wrenger 2023-03-31 18:41:17 +02:00 committed by GitHub
parent 4161bad54f
commit 631ba40e57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 21 deletions

View File

@ -0,0 +1,10 @@
@article{keshav2007read,
title = {How to read a paper},
author = {Keshav, Srinivasan},
journal = {ACM SIGCOMM Computer Communication Review},
volume = {37},
number = {3},
pages = {83--84},
year = {2007},
publisher = {ACM New York, NY, USA}
}

View File

@ -48,13 +48,17 @@ pub struct BibliographyElem {
/// Path to a Hayagriva `.yml` or BibLaTeX `.bib` file.
#[required]
#[parse(
let Spanned { v: path, span } =
args.expect::<Spanned<EcoString>>("path to bibliography file")?;
let path: EcoString = vm.locate(&path).at(span)?.to_string_lossy().into();
let _ = load(vm.world(), &path).at(span)?;
path
let Spanned { v: mut paths, span } =
args.expect::<Spanned<BibPaths>>("path to bibliography file")?;
for path in &mut paths.0 {
// resolve paths
*path = vm.locate(&path).at(span)?.to_string_lossy().into();
}
// check that parsing works
let _ = load(vm.world(), &paths).at(span)?;
paths
)]
pub path: EcoString,
pub path: BibPaths,
/// The title of the bibliography.
///
@ -70,6 +74,22 @@ pub struct BibliographyElem {
pub style: BibliographyStyle,
}
/// A list of bib file paths.
#[derive(Debug, Default, Clone, Hash)]
pub struct BibPaths(Vec<EcoString>);
cast_from_value! {
BibPaths,
v: EcoString => Self(vec![v]),
v: Array => Self(v.into_iter().map(Value::cast).collect::<StrResult<_>>()?),
}
cast_to_value! {
v: BibPaths => v.0.into()
}
impl BibliographyElem {
/// Find the document's bibliography.
pub fn find(introspector: Tracked<Introspector>) -> StrResult<Self> {
@ -534,22 +554,53 @@ fn create(
/// Load bibliography entries from a path.
#[comemo::memoize]
fn load(world: Tracked<dyn World>, path: &str) -> StrResult<EcoVec<hayagriva::Entry>> {
let path = Path::new(path);
let buffer = world.file(path)?;
let src = std::str::from_utf8(&buffer).map_err(|_| "file is not valid utf-8")?;
fn load(
world: Tracked<dyn World>,
paths: &BibPaths,
) -> StrResult<EcoVec<hayagriva::Entry>> {
let mut result = EcoVec::new();
// We might have multiple bib/yaml files
for path in &paths.0 {
let buffer = world.file(Path::new(path.as_str()))?;
let src = std::str::from_utf8(&buffer).map_err(|_| "file is not valid utf-8")?;
let entries = parse_bib(path, src)?;
result.extend(entries);
}
// Biblatex only checks for duplicate keys within files
// -> We have to do this between files again
let mut keys = result.iter().map(|r| r.key()).collect::<Vec<_>>();
keys.sort_unstable();
// Waiting for `slice_partition_dedup` #54279
let mut duplicates = Vec::new();
for pair in keys.windows(2) {
if pair[0] == pair[1] {
duplicates.push(pair[0]);
}
}
if !duplicates.is_empty() {
Err(eco_format!("duplicate bibliography keys: {}", duplicates.join(", ")))
} else {
Ok(result)
}
}
/// Parse a bibliography file (bib/yml)
fn parse_bib(path_str: &str, src: &str) -> StrResult<Vec<hayagriva::Entry>> {
let path = Path::new(path_str);
let ext = path.extension().and_then(OsStr::to_str).unwrap_or_default();
let entries = match ext.to_lowercase().as_str() {
"yml" => hayagriva::io::from_yaml_str(src).map_err(format_hayagriva_error)?,
match ext.to_lowercase().as_str() {
"yml" => hayagriva::io::from_yaml_str(src).map_err(format_hayagriva_error),
"bib" => hayagriva::io::from_biblatex_str(src).map_err(|err| {
err.into_iter()
.next()
.map(|error| format_biblatex_error(src, error))
.unwrap_or_else(|| "failed to parse biblatex file".into())
})?,
_ => return Err("unknown bibliography format".into()),
};
Ok(entries.into_iter().collect())
.map(|error| format_biblatex_error(path_str, src, error))
.unwrap_or_else(|| eco_format!("failed to parse {path_str}"))
}),
_ => Err("unknown bibliography format".into()),
}
}
/// Format a Hayagriva loading error.
@ -558,13 +609,13 @@ fn format_hayagriva_error(error: YamlBibliographyError) -> EcoString {
}
/// Format a BibLaTeX loading error.
fn format_biblatex_error(src: &str, error: BibLaTeXError) -> EcoString {
fn format_biblatex_error(path: &str, src: &str, error: BibLaTeXError) -> EcoString {
let (span, msg) = match error {
BibLaTeXError::Parse(error) => (error.span, error.kind.to_string()),
BibLaTeXError::Type(error) => (error.span, error.kind.to_string()),
};
let line = src.get(..span.start).unwrap_or_default().lines().count();
eco_format!("failed to parse biblatex file: {msg} in line {line}")
eco_format!("parsing failed at {path}:{line}: {msg}")
}
/// Hayagriva only supports strings, but we have a content supplement. To deal

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 163 KiB

View File

@ -1,7 +1,7 @@
// Test citations and bibliographies.
---
// Error: 15-25 failed to parse biblatex file: wrong number of digits in line 5
// Error: 15-25 parsing failed at ../assets/files/bad.bib:5: wrong number of digits
#bibliography("/bad.bib")
---
@ -27,3 +27,13 @@ As described by @netwok],
the net-work is a creature of its own.
This is close to piratery! @arrgh
And quark! @quark
---
// Error: 15-43 duplicate bibliography keys: arrgh, distress, glacier-melt, issue201, mcintosh_anxiety, netwok, psychology25, quark, restful, sharing, tolkien54
#bibliography(("/works.bib", "/works.bib"))
---
#set page(width: 200pt)
= Multiple Bibs
Now we have multiple bibliographies containing #cite("glacier-melt", "keshav2007read")
#bibliography(("/works.bib", "/works_too.bib"))