Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

pondrs

Pipelines of Nodes & Datasets — a Rust pipeline execution library, heavily inspired by Kedro.

Example

Define your catalog and params as structs, with datasets backed by files or memory:

#[derive(Serialize, Deserialize)]
struct Catalog {
    readings: PolarsCsvDataset,
    summary: MemoryDataset<f64>,
    report: JsonDataset,
}

#[derive(Serialize, Deserialize)]
struct Params {
    threshold: Param<f64>,
}

Write a pipeline function that wires nodes together through shared datasets:

fn pipeline<'a>(cat: &'a Catalog, params: &'a Params) -> impl Steps<PondError> + 'a {
    (
        Node {
            name: "summarize",
            func: |df: DataFrame| {
                let mean = df.column("value").unwrap().f64().unwrap().mean().unwrap();
                (mean,)
            },
            input: (&cat.readings,),
            output: (&cat.summary,),
        },
        Node {
            name: "report",
            func: |mean: f64, threshold: f64| {
                (json!({ "mean": mean, "passed": mean >= threshold }),)
            },
            input: (&cat.summary, &params.threshold),
            output: (&cat.report,),
        },
    )
}

Configure your catalog and params via YAML and run with the built-in CLI:

# catalog.yml
readings:
  path: data/readings.csv
  separator: ","
summary: {}
report:
  path: data/report.json
# params.yml
threshold: 0.5
    pondrs::app::App::from_yaml(
        dir.join("catalog.yml").to_str().unwrap(),
        dir.join("params.yml").to_str().unwrap(),
    )?
    .with_args(std::env::args_os())?
    .dispatch(pipeline)
$ my_app run
$ my_app run --params threshold=0.8   # override params from CLI
$ my_app check                        # validate pipeline DAG
$ my_app viz                          # interactive pipeline visualization

Pipeline visualization

Open fullscreen