mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Implement CSV file header rows support. (#2619)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
parent
e8e797c18b
commit
1f983ced90
@ -2,7 +2,7 @@ use ecow::{eco_format, EcoString};
|
|||||||
|
|
||||||
use crate::diag::{bail, At, SourceResult};
|
use crate::diag::{bail, At, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{cast, func, scope, Array, IntoValue, Value};
|
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
|
||||||
use crate::loading::Readable;
|
use crate::loading::Readable;
|
||||||
use crate::syntax::Spanned;
|
use crate::syntax::Spanned;
|
||||||
use crate::World;
|
use crate::World;
|
||||||
@ -35,11 +35,21 @@ pub fn csv(
|
|||||||
#[named]
|
#[named]
|
||||||
#[default]
|
#[default]
|
||||||
delimiter: Delimiter,
|
delimiter: Delimiter,
|
||||||
|
/// How to represent the file's rows.
|
||||||
|
///
|
||||||
|
/// - If set to `array`, each row is represented as a plain array of
|
||||||
|
/// strings.
|
||||||
|
/// - If set to `dictionary`, each row is represented as a dictionary
|
||||||
|
/// mapping from header keys to strings. This option only makes sense when
|
||||||
|
/// a header row is present in the CSV file.
|
||||||
|
#[named]
|
||||||
|
#[default(RowType::Array)]
|
||||||
|
row_type: RowType,
|
||||||
) -> SourceResult<Array> {
|
) -> SourceResult<Array> {
|
||||||
let Spanned { v: path, span } = path;
|
let Spanned { v: path, span } = path;
|
||||||
let id = span.resolve_path(&path).at(span)?;
|
let id = span.resolve_path(&path).at(span)?;
|
||||||
let data = engine.world.file(id).at(span)?;
|
let data = engine.world.file(id).at(span)?;
|
||||||
self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter)
|
self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[scope]
|
#[scope]
|
||||||
@ -54,22 +64,59 @@ impl csv {
|
|||||||
#[named]
|
#[named]
|
||||||
#[default]
|
#[default]
|
||||||
delimiter: Delimiter,
|
delimiter: Delimiter,
|
||||||
|
/// How to represent the file's rows.
|
||||||
|
///
|
||||||
|
/// - If set to `array`, each row is represented as a plain array of
|
||||||
|
/// strings.
|
||||||
|
/// - If set to `dictionary`, each row is represented as a dictionary
|
||||||
|
/// mapping from header keys to strings. This option only makes sense
|
||||||
|
/// when a header row is present in the CSV file.
|
||||||
|
#[named]
|
||||||
|
#[default(RowType::Array)]
|
||||||
|
row_type: RowType,
|
||||||
) -> SourceResult<Array> {
|
) -> SourceResult<Array> {
|
||||||
let Spanned { v: data, span } = data;
|
let Spanned { v: data, span } = data;
|
||||||
let mut builder = ::csv::ReaderBuilder::new();
|
let has_headers = row_type == RowType::Dict;
|
||||||
builder.has_headers(false);
|
|
||||||
builder.delimiter(delimiter.0 as u8);
|
|
||||||
let mut reader = builder.from_reader(data.as_slice());
|
|
||||||
let mut array = Array::new();
|
|
||||||
|
|
||||||
|
let mut builder = ::csv::ReaderBuilder::new();
|
||||||
|
builder.has_headers(has_headers);
|
||||||
|
builder.delimiter(delimiter.0 as u8);
|
||||||
|
|
||||||
|
// Counting lines from 1 by default.
|
||||||
|
let mut line_offset: usize = 1;
|
||||||
|
let mut reader = builder.from_reader(data.as_slice());
|
||||||
|
let mut headers: Option<::csv::StringRecord> = None;
|
||||||
|
|
||||||
|
if has_headers {
|
||||||
|
// Counting lines from 2 because we have a header.
|
||||||
|
line_offset += 1;
|
||||||
|
headers = Some(
|
||||||
|
reader
|
||||||
|
.headers()
|
||||||
|
.map_err(|err| format_csv_error(err, 1))
|
||||||
|
.at(span)?
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut array = Array::new();
|
||||||
for (line, result) in reader.records().enumerate() {
|
for (line, result) in reader.records().enumerate() {
|
||||||
// Original solution use line from error, but that is incorrect with
|
// Original solution was to use line from error, but that is
|
||||||
// `has_headers` set to `false`. See issue:
|
// incorrect with `has_headers` set to `false`. See issue:
|
||||||
// https://github.com/BurntSushi/rust-csv/issues/184
|
// https://github.com/BurntSushi/rust-csv/issues/184
|
||||||
let line = line + 1; // Counting lines from 1
|
let line = line + line_offset;
|
||||||
let row = result.map_err(|err| format_csv_error(err, line)).at(span)?;
|
let row = result.map_err(|err| format_csv_error(err, line)).at(span)?;
|
||||||
let sub = row.into_iter().map(|field| field.into_value()).collect();
|
let item = if let Some(headers) = &headers {
|
||||||
array.push(Value::Array(sub))
|
let mut dict = Dict::new();
|
||||||
|
for (field, value) in headers.iter().zip(&row) {
|
||||||
|
dict.insert(field.into(), value.into_value());
|
||||||
|
}
|
||||||
|
dict.into_value()
|
||||||
|
} else {
|
||||||
|
let sub = row.into_iter().map(|field| field.into_value()).collect();
|
||||||
|
Value::Array(sub)
|
||||||
|
};
|
||||||
|
array.push(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(array)
|
Ok(array)
|
||||||
@ -103,6 +150,30 @@ cast! {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The type of parsed rows.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
|
||||||
|
pub enum RowType {
|
||||||
|
Array,
|
||||||
|
Dict,
|
||||||
|
}
|
||||||
|
|
||||||
|
cast! {
|
||||||
|
RowType,
|
||||||
|
self => match self {
|
||||||
|
Self::Array => Type::of::<Array>(),
|
||||||
|
Self::Dict => Type::of::<Dict>(),
|
||||||
|
}.into_value(),
|
||||||
|
ty: Type => {
|
||||||
|
if ty == Type::of::<Array>() {
|
||||||
|
Self::Array
|
||||||
|
} else if ty == Type::of::<Dict>() {
|
||||||
|
Self::Dict
|
||||||
|
} else {
|
||||||
|
bail!("expected `array` or `dictionary`");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
/// Format the user-facing CSV error message.
|
/// Format the user-facing CSV error message.
|
||||||
fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
|
fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
|
||||||
match err.kind() {
|
match err.kind() {
|
||||||
|
@ -22,6 +22,14 @@
|
|||||||
#let cells = data.at(0).map(strong) + data.slice(1).flatten()
|
#let cells = data.at(0).map(strong) + data.slice(1).flatten()
|
||||||
#table(columns: data.at(0).len(), ..cells)
|
#table(columns: data.at(0).len(), ..cells)
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test reading CSV data with dictionary rows enabled.
|
||||||
|
#let data = csv("/files/zoo.csv", row-type: dictionary)
|
||||||
|
#test(data.len(), 3)
|
||||||
|
#test(data.at(0).Name, "Debby")
|
||||||
|
#test(data.at(2).Weight, "150kg")
|
||||||
|
#test(data.at(1).Species, "Tiger")
|
||||||
|
|
||||||
---
|
---
|
||||||
// Error: 6-16 file not found (searched at typ/compute/nope.csv)
|
// Error: 6-16 file not found (searched at typ/compute/nope.csv)
|
||||||
#csv("nope.csv")
|
#csv("nope.csv")
|
||||||
@ -30,6 +38,11 @@
|
|||||||
// Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3)
|
// Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3)
|
||||||
#csv("/files/bad.csv")
|
#csv("/files/bad.csv")
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test error numbering with dictionary rows.
|
||||||
|
// Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3)
|
||||||
|
#csv("/files/bad.csv", row-type: dictionary)
|
||||||
|
|
||||||
---
|
---
|
||||||
// Test reading JSON data.
|
// Test reading JSON data.
|
||||||
#let data = json("/files/zoo.json")
|
#let data = json("/files/zoo.json")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user