Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Identity Pipeline

Demonstrates the Ident node: write CSV as plain text, read it back as a Polars DataFrame via Ident, and produce a Plotly bar chart.

Usage

cargo run --example ident_app -- \
    --catalog-path examples/ident_data/catalog.yml \
    --params-path examples/ident_data/params.yml run

Types

// ---------------------------------------------------------------------------
// Catalog
// ---------------------------------------------------------------------------

#[derive(Serialize, Deserialize)]
struct IdentCatalog {
    csv_text: TextDataset,
    csv_data: PolarsCsvDataset,
    chart: PlotlyDataset,
}

Node functions

// ---------------------------------------------------------------------------
// Node functions
// ---------------------------------------------------------------------------

fn generate_csv() -> (String,) {
    let csv = "\
fruit,count
Apples,35
Bananas,22
Cherries,48
Dates,15
Elderberries,31
Figs,9
Grapes,42";
    (csv.to_string(),)
}

fn build_chart(df: DataFrame) -> (Plot,) {
    let fruits: Vec<String> = df
        .column("fruit")
        .unwrap()
        .str()
        .unwrap()
        .into_no_null_iter()
        .map(|s| s.to_string())
        .collect();
    let counts: Vec<i64> = df
        .column("count")
        .unwrap()
        .i64()
        .unwrap()
        .into_no_null_iter()
        .collect();

    let mut plot = Plot::new();
    plot.add_trace(Bar::new(fruits, counts).name("Fruit Count"));
    plot.set_layout(
        Layout::new()
            .title("Fruit Inventory")
            .y_axis(plotly::layout::Axis::new().title("Count")),
    );
    (plot,)
}

Pipeline definition

// ---------------------------------------------------------------------------
// Pipeline function
// ---------------------------------------------------------------------------

fn ident_pipeline<'a>(
    cat: &'a IdentCatalog,
    _params: &'a (),
) -> impl Steps<PondError> + 'a {
    (
        Node {
            name: "generate_csv",
            func: generate_csv,
            input: (),
            output: (&cat.csv_text,),
        },
        Ident {
            name: "text_to_csv",
            input: &cat.csv_text,
            output: &cat.csv_data,
        },
        Node {
            name: "build_chart",
            func: build_chart,
            input: (&cat.csv_data,),
            output: (&cat.chart,),
        },
    )
}

Pipeline visualization

Open fullscreen