hoodini.utils.polars_adapters

Helpers to bridge pandas↔Polars at the edges of the codebase.

Goal: keep Polars as the internal dataframe representation. Use these adapters at integration boundaries that still emit pandas (external libs, legacy code) until migration is complete.

 1"""Helpers to bridge pandas↔Polars at the edges of the codebase.
 2
 3Goal: keep Polars as the internal dataframe representation. Use these adapters at
 4integration boundaries that still emit pandas (external libs, legacy code) until
 5migration is complete.
 6"""
 7
 8from __future__ import annotations
 9
10from collections.abc import Iterable, Mapping
11from typing import Any
12
13import polars as pl
14
15from hoodini.models.schemas import TableSchema, ensure_schema
16
17
18def to_polars(
19    df: pl.DataFrame | pl.LazyFrame | Any, *, schema: TableSchema | None = None
20) -> pl.DataFrame:
21    """Convert incoming data to a Polars DataFrame and optionally enforce schema."""
22    if isinstance(df, pl.DataFrame):
23        out = df
24    elif isinstance(df, pl.LazyFrame):
25        out = df.collect()
26    else:
27        raise TypeError(f"Unsupported dataframe type: {type(df)}; expected Polars")
28
29    if schema:
30        out = ensure_schema(out, schema)
31    return out
32
33
34def to_pandas(df: pl.DataFrame | pl.LazyFrame | Any):
35    """Deprecated: pandas removed from dependencies."""
36    raise ImportError("pandas has been removed; please use Polars instead")
37
38
39def ensure_required(df: pl.DataFrame, cols: Iterable[str]) -> None:
40    missing = [c for c in cols if c not in df.columns]
41    if missing:
42        raise ValueError(f"Missing required columns: {missing}")
43
44
45def rename_if_present(df: pl.DataFrame, mapping: Mapping[str, str]) -> pl.DataFrame:
46    """Rename columns that exist; ignore missing keys."""
47    present = {old: new for old, new in mapping.items() if old in df.columns}
48    return df.rename(present) if present else df
def to_polars( df: polars.dataframe.frame.DataFrame | polars.lazyframe.frame.LazyFrame | typing.Any, *, schema: hoodini.models.schemas.TableSchema | None = None) -> polars.dataframe.frame.DataFrame:
19def to_polars(
20    df: pl.DataFrame | pl.LazyFrame | Any, *, schema: TableSchema | None = None
21) -> pl.DataFrame:
22    """Convert incoming data to a Polars DataFrame and optionally enforce schema."""
23    if isinstance(df, pl.DataFrame):
24        out = df
25    elif isinstance(df, pl.LazyFrame):
26        out = df.collect()
27    else:
28        raise TypeError(f"Unsupported dataframe type: {type(df)}; expected Polars")
29
30    if schema:
31        out = ensure_schema(out, schema)
32    return out

Convert incoming data to a Polars DataFrame and optionally enforce schema.

def to_pandas( df: polars.dataframe.frame.DataFrame | polars.lazyframe.frame.LazyFrame | typing.Any):
35def to_pandas(df: pl.DataFrame | pl.LazyFrame | Any):
36    """Deprecated: pandas removed from dependencies."""
37    raise ImportError("pandas has been removed; please use Polars instead")

Deprecated: pandas removed from dependencies.

def ensure_required(df: polars.dataframe.frame.DataFrame, cols: Iterable[str]) -> None:
40def ensure_required(df: pl.DataFrame, cols: Iterable[str]) -> None:
41    missing = [c for c in cols if c not in df.columns]
42    if missing:
43        raise ValueError(f"Missing required columns: {missing}")
def rename_if_present( df: polars.dataframe.frame.DataFrame, mapping: Mapping[str, str]) -> polars.dataframe.frame.DataFrame:
46def rename_if_present(df: pl.DataFrame, mapping: Mapping[str, str]) -> pl.DataFrame:
47    """Rename columns that exist; ignore missing keys."""
48    present = {old: new for old, new in mapping.items() if old in df.columns}
49    return df.rename(present) if present else df

Rename columns that exist; ignore missing keys.