Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion crates/connect/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,27 @@ impl DataFrame {
DataFrameNaFunctions::new(self)
}

// !TODO observe
/// Define (named) metrics to observe on the DataFrame.
///
/// This method attaches metric expressions to the query plan. The metrics
/// are computed during query execution and can be retrieved from the
/// execution metrics.
///
/// # Arguments
/// * `name` - Name of the observation
/// * `exprs` - Metric expressions (e.g., `count(lit(1))`, `avg(col("value"))`)
pub fn observe<I>(self, name: &str, exprs: I) -> DataFrame
where
I: IntoIterator,
I::Item: Into<Column>,
{
let plan = self.plan.observe(name, exprs);

DataFrame {
spark_session: self.spark_session,
plan,
}
}

/// Returns a new [DataFrame] by skiping the first n rows
pub fn offset(self, num: i32) -> DataFrame {
Expand Down
17 changes: 17 additions & 0 deletions crates/connect/src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,23 @@ impl LogicalPlanBuilder {
LogicalPlanBuilder::from(hint_rel)
}

pub fn observe<I>(self, name: &str, exprs: I) -> LogicalPlanBuilder
where
I: IntoIterator,
I::Item: Into<Column>,
{
let metrics: Vec<spark::Expression> =
exprs.into_iter().map(|e| e.into().expression).collect();

let collect_metrics = RelType::CollectMetrics(Box::new(spark::CollectMetrics {
input: self.relation_input(),
name: name.to_string(),
metrics,
}));

LogicalPlanBuilder::from(collect_metrics)
}

pub fn join<'a, T, I>(
self,
right: LogicalPlanBuilder,
Expand Down