diff --git a/crates/typst/src/loading/csv.rs b/crates/typst/src/loading/csv.rs index 0d0095609..101b3812a 100644 --- a/crates/typst/src/loading/csv.rs +++ b/crates/typst/src/loading/csv.rs @@ -2,7 +2,7 @@ use ecow::{eco_format, EcoString}; use crate::diag::{bail, At, SourceResult}; use crate::engine::Engine; -use crate::foundations::{cast, func, scope, Array, IntoValue, Value}; +use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; use crate::loading::Readable; use crate::syntax::Spanned; use crate::World; @@ -35,11 +35,21 @@ pub fn csv( #[named] #[default] delimiter: Delimiter, + /// How to represent the file's rows. + /// + /// - If set to `array`, each row is represented as a plain array of + /// strings. + /// - If set to `dictionary`, each row is represented as a dictionary + /// mapping from header keys to strings. This option only makes sense when + /// a header row is present in the CSV file. + #[named] + #[default(RowType::Array)] + row_type: RowType, ) -> SourceResult { let Spanned { v: path, span } = path; let id = span.resolve_path(&path).at(span)?; let data = engine.world.file(id).at(span)?; - self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter) + self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type) } #[scope] @@ -54,22 +64,59 @@ impl csv { #[named] #[default] delimiter: Delimiter, + /// How to represent the file's rows. + /// + /// - If set to `array`, each row is represented as a plain array of + /// strings. + /// - If set to `dictionary`, each row is represented as a dictionary + /// mapping from header keys to strings. This option only makes sense + /// when a header row is present in the CSV file. + #[named] + #[default(RowType::Array)] + row_type: RowType, ) -> SourceResult { let Spanned { v: data, span } = data; - let mut builder = ::csv::ReaderBuilder::new(); - builder.has_headers(false); - builder.delimiter(delimiter.0 as u8); - let mut reader = builder.from_reader(data.as_slice()); - let mut array = Array::new(); + let has_headers = row_type == RowType::Dict; + let mut builder = ::csv::ReaderBuilder::new(); + builder.has_headers(has_headers); + builder.delimiter(delimiter.0 as u8); + + // Counting lines from 1 by default. + let mut line_offset: usize = 1; + let mut reader = builder.from_reader(data.as_slice()); + let mut headers: Option<::csv::StringRecord> = None; + + if has_headers { + // Counting lines from 2 because we have a header. + line_offset += 1; + headers = Some( + reader + .headers() + .map_err(|err| format_csv_error(err, 1)) + .at(span)? + .clone(), + ); + } + + let mut array = Array::new(); for (line, result) in reader.records().enumerate() { - // Original solution use line from error, but that is incorrect with - // `has_headers` set to `false`. See issue: + // Original solution was to use line from error, but that is + // incorrect with `has_headers` set to `false`. See issue: // https://github.com/BurntSushi/rust-csv/issues/184 - let line = line + 1; // Counting lines from 1 + let line = line + line_offset; let row = result.map_err(|err| format_csv_error(err, line)).at(span)?; - let sub = row.into_iter().map(|field| field.into_value()).collect(); - array.push(Value::Array(sub)) + let item = if let Some(headers) = &headers { + let mut dict = Dict::new(); + for (field, value) in headers.iter().zip(&row) { + dict.insert(field.into(), value.into_value()); + } + dict.into_value() + } else { + let sub = row.into_iter().map(|field| field.into_value()).collect(); + Value::Array(sub) + }; + array.push(item); } Ok(array) @@ -103,6 +150,30 @@ cast! { }, } +/// The type of parsed rows. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum RowType { + Array, + Dict, +} + +cast! { + RowType, + self => match self { + Self::Array => Type::of::(), + Self::Dict => Type::of::(), + }.into_value(), + ty: Type => { + if ty == Type::of::() { + Self::Array + } else if ty == Type::of::() { + Self::Dict + } else { + bail!("expected `array` or `dictionary`"); + } + }, +} + /// Format the user-facing CSV error message. fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString { match err.kind() { diff --git a/tests/typ/compute/data.typ b/tests/typ/compute/data.typ index c0a6ce50a..ae964b3d2 100644 --- a/tests/typ/compute/data.typ +++ b/tests/typ/compute/data.typ @@ -22,6 +22,14 @@ #let cells = data.at(0).map(strong) + data.slice(1).flatten() #table(columns: data.at(0).len(), ..cells) +--- +// Test reading CSV data with dictionary rows enabled. +#let data = csv("/files/zoo.csv", row-type: dictionary) +#test(data.len(), 3) +#test(data.at(0).Name, "Debby") +#test(data.at(2).Weight, "150kg") +#test(data.at(1).Species, "Tiger") + --- // Error: 6-16 file not found (searched at typ/compute/nope.csv) #csv("nope.csv") @@ -30,6 +38,11 @@ // Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3) #csv("/files/bad.csv") +--- +// Test error numbering with dictionary rows. +// Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3) +#csv("/files/bad.csv", row-type: dictionary) + --- // Test reading JSON data. #let data = json("/files/zoo.json")