Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 15 additions & 32 deletions crates/catalog/rest/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use std::sync::Arc;

use async_trait::async_trait;
use iceberg::io::{FileIO, FileIOBuilder, StorageFactory};
use iceberg::spec::TableMetadata;
use iceberg::table::Table;
use iceberg::{
Catalog, CatalogBuilder, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit,
Expand All @@ -33,7 +34,7 @@ use itertools::Itertools;
use reqwest::header::{
HeaderMap, HeaderName, HeaderValue, {self},
};
use reqwest::{Client, Method, StatusCode, Url};
use reqwest::{Client, Method, StatusCode};
use tokio::sync::OnceCell;
use typed_builder::TypedBuilder;

Expand Down Expand Up @@ -406,29 +407,14 @@ impl RestCatalog {

async fn load_file_io(
&self,
metadata_location: Option<&str>,
metadata: &TableMetadata,
extra_config: Option<HashMap<String, String>>,
) -> Result<FileIO> {
let mut props = self.context().await?.config.props.clone();
if let Some(config) = extra_config {
props.extend(config);
}

// If the warehouse is a logical identifier instead of a URL we don't want
// to raise an exception
let warehouse_path = match self.context().await?.config.warehouse.as_deref() {
Some(url) if Url::parse(url).is_ok() => Some(url),
Some(_) => None,
None => None,
};

if metadata_location.or(warehouse_path).is_none() {
return Err(Error::new(
ErrorKind::Unexpected,
"Unable to load file io, neither warehouse nor metadata location is set!",
));
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need these checks anymore since now we rely on a configured StorageFactory

// Require a StorageFactory to be provided
let factory = self
.storage_factory
Expand All @@ -438,7 +424,8 @@ impl RestCatalog {
ErrorKind::Unexpected,
"StorageFactory must be provided for RestCatalog. Use `with_storage_factory` to configure it.",
)
})?;
})?
.with_metadata(metadata)?;

let file_io = FileIOBuilder::new(factory).with_props(props).build();

Expand Down Expand Up @@ -743,20 +730,20 @@ impl Catalog for RestCatalog {
}
};

let metadata_location = response.metadata_location.as_ref().ok_or(Error::new(
ErrorKind::DataInvalid,
"Metadata location missing in `create_table` response!",
))?;
if response.metadata_location.is_none() {
return Err(Error::new(
ErrorKind::DataInvalid,
"Metadata location missing in `create_table` response!",
));
}

let config = response
.config
.into_iter()
.chain(self.user_config.props.clone())
.collect();

let file_io = self
.load_file_io(Some(metadata_location), Some(config))
.await?;
let file_io = self.load_file_io(&response.metadata, Some(config)).await?;

let table_builder = Table::builder()
.identifier(table_ident.clone())
Expand Down Expand Up @@ -810,9 +797,7 @@ impl Catalog for RestCatalog {
.chain(self.user_config.props.clone())
.collect();

let file_io = self
.load_file_io(response.metadata_location.as_deref(), Some(config))
.await?;
let file_io = self.load_file_io(&response.metadata, Some(config)).await?;

let table_builder = Table::builder()
.identifier(table_ident.clone())
Expand Down Expand Up @@ -960,7 +945,7 @@ impl Catalog for RestCatalog {
"Metadata location missing in `register_table` response!",
))?;

let file_io = self.load_file_io(Some(metadata_location), None).await?;
let file_io = self.load_file_io(&response.metadata, None).await?;

Table::builder()
.identifier(table_ident.clone())
Expand Down Expand Up @@ -1030,9 +1015,7 @@ impl Catalog for RestCatalog {
}
};

let file_io = self
.load_file_io(Some(&response.metadata_location), None)
.await?;
let file_io = self.load_file_io(&response.metadata, None).await?;

Table::builder()
.identifier(commit.identifier().clone())
Expand Down
7 changes: 7 additions & 0 deletions crates/iceberg/src/io/storage/local_fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,13 @@ pub struct LocalFsStorageFactory;

#[typetag::serde]
impl StorageFactory for LocalFsStorageFactory {
fn with_metadata(
&self,
_metadata: &crate::spec::TableMetadata,
) -> Result<Arc<dyn StorageFactory>> {
Ok(Arc::new(self.clone()))
}

fn build(&self, _config: &StorageConfig) -> Result<Arc<dyn Storage>> {
Ok(Arc::new(LocalFsStorage::new()))
}
Expand Down
7 changes: 7 additions & 0 deletions crates/iceberg/src/io/storage/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,13 @@ pub struct MemoryStorageFactory;

#[typetag::serde]
impl StorageFactory for MemoryStorageFactory {
fn with_metadata(
&self,
_metadata: &crate::spec::TableMetadata,
) -> Result<Arc<dyn StorageFactory>> {
Ok(Arc::new(self.clone()))
}

fn build(&self, _config: &StorageConfig) -> Result<Arc<dyn Storage>> {
Ok(Arc::new(MemoryStorage::new()))
}
Expand Down
14 changes: 14 additions & 0 deletions crates/iceberg/src/io/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub use memory::{MemoryStorage, MemoryStorageFactory};

use super::{FileMetadata, FileRead, FileWrite, InputFile, OutputFile};
use crate::Result;
use crate::spec::TableMetadata;

/// Trait for storage operations in Iceberg.
///
Expand Down Expand Up @@ -128,6 +129,19 @@ pub trait Storage: Debug + Send + Sync {
/// ```
#[typetag::serde(tag = "type")]
pub trait StorageFactory: Debug + Send + Sync {
/// Create a new factory instance enriched with table metadata.
///
/// This allows storage factories to incorporate table-level metadata
/// (e.g., table properties) into the storage initialization.
///
/// Implementations that don't need table metadata should return
/// a clone of themselves: `Ok(Arc::new(self.clone()))`.
///
/// # Arguments
///
/// * `metadata` - The table metadata to incorporate
fn with_metadata(&self, metadata: &TableMetadata) -> Result<Arc<dyn StorageFactory>>;

/// Build a new Storage instance from the given configuration.
///
/// # Arguments
Expand Down
7 changes: 7 additions & 0 deletions crates/storage/opendal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ pub enum OpenDalStorageFactory {

#[typetag::serde(name = "OpenDalStorageFactory")]
impl StorageFactory for OpenDalStorageFactory {
fn with_metadata(
&self,
_metadata: &iceberg::spec::TableMetadata,
) -> Result<Arc<dyn StorageFactory>> {
Ok(Arc::new(self.clone()))
}

#[allow(unused_variables)]
fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>> {
match self {
Expand Down
7 changes: 7 additions & 0 deletions crates/storage/opendal/src/resolving.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ impl OpenDalResolvingStorageFactory {

#[typetag::serde]
impl StorageFactory for OpenDalResolvingStorageFactory {
fn with_metadata(
&self,
_metadata: &iceberg::spec::TableMetadata,
) -> Result<Arc<dyn StorageFactory>> {
Ok(Arc::new(self.clone()))
}

fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>> {
Ok(Arc::new(OpenDalResolvingStorage {
props: config.props().clone(),
Expand Down
Loading