hoodini.utils.polars_adapters
Helpers to bridge pandas↔Polars at the edges of the codebase.
Goal: keep Polars as the internal dataframe representation. Use these adapters at integration boundaries that still emit pandas (external libs, legacy code) until migration is complete.
1"""Helpers to bridge pandas↔Polars at the edges of the codebase. 2 3Goal: keep Polars as the internal dataframe representation. Use these adapters at 4integration boundaries that still emit pandas (external libs, legacy code) until 5migration is complete. 6""" 7 8from __future__ import annotations 9 10from collections.abc import Iterable, Mapping 11from typing import Any 12 13import polars as pl 14 15from hoodini.models.schemas import TableSchema, ensure_schema 16 17 18def to_polars( 19 df: pl.DataFrame | pl.LazyFrame | Any, *, schema: TableSchema | None = None 20) -> pl.DataFrame: 21 """Convert incoming data to a Polars DataFrame and optionally enforce schema.""" 22 if isinstance(df, pl.DataFrame): 23 out = df 24 elif isinstance(df, pl.LazyFrame): 25 out = df.collect() 26 else: 27 raise TypeError(f"Unsupported dataframe type: {type(df)}; expected Polars") 28 29 if schema: 30 out = ensure_schema(out, schema) 31 return out 32 33 34def to_pandas(df: pl.DataFrame | pl.LazyFrame | Any): 35 """Deprecated: pandas removed from dependencies.""" 36 raise ImportError("pandas has been removed; please use Polars instead") 37 38 39def ensure_required(df: pl.DataFrame, cols: Iterable[str]) -> None: 40 missing = [c for c in cols if c not in df.columns] 41 if missing: 42 raise ValueError(f"Missing required columns: {missing}") 43 44 45def rename_if_present(df: pl.DataFrame, mapping: Mapping[str, str]) -> pl.DataFrame: 46 """Rename columns that exist; ignore missing keys.""" 47 present = {old: new for old, new in mapping.items() if old in df.columns} 48 return df.rename(present) if present else df
def
to_polars( df: polars.dataframe.frame.DataFrame | polars.lazyframe.frame.LazyFrame | typing.Any, *, schema: hoodini.models.schemas.TableSchema | None = None) -> polars.dataframe.frame.DataFrame:
19def to_polars( 20 df: pl.DataFrame | pl.LazyFrame | Any, *, schema: TableSchema | None = None 21) -> pl.DataFrame: 22 """Convert incoming data to a Polars DataFrame and optionally enforce schema.""" 23 if isinstance(df, pl.DataFrame): 24 out = df 25 elif isinstance(df, pl.LazyFrame): 26 out = df.collect() 27 else: 28 raise TypeError(f"Unsupported dataframe type: {type(df)}; expected Polars") 29 30 if schema: 31 out = ensure_schema(out, schema) 32 return out
Convert incoming data to a Polars DataFrame and optionally enforce schema.
def
to_pandas( df: polars.dataframe.frame.DataFrame | polars.lazyframe.frame.LazyFrame | typing.Any):
35def to_pandas(df: pl.DataFrame | pl.LazyFrame | Any): 36 """Deprecated: pandas removed from dependencies.""" 37 raise ImportError("pandas has been removed; please use Polars instead")
Deprecated: pandas removed from dependencies.
def
ensure_required(df: polars.dataframe.frame.DataFrame, cols: Iterable[str]) -> None:
def
rename_if_present( df: polars.dataframe.frame.DataFrame, mapping: Mapping[str, str]) -> polars.dataframe.frame.DataFrame:
46def rename_if_present(df: pl.DataFrame, mapping: Mapping[str, str]) -> pl.DataFrame: 47 """Rename columns that exist; ignore missing keys.""" 48 present = {old: new for old, new in mapping.items() if old in df.columns} 49 return df.rename(present) if present else df
Rename columns that exist; ignore missing keys.