hoodini.utils.seq_io

FASTA helpers and simple file transforms.

 1"""FASTA helpers and simple file transforms."""
 2
 3from __future__ import annotations
 4
 5from pathlib import Path
 6
 7import polars as pl
 8from Bio import SeqIO
 9
10
11def to_fasta(df: pl.DataFrame, id_col: str, seq_col: str, path: str | Path) -> None:
12    """Write a Polars DataFrame to FASTA."""
13    dest = Path(path)
14    with dest.open("w") as f:
15        for row in df.iter_rows(named=True):
16            f.write(f">{row[id_col]}\n{row[seq_col]}\n")
17
18
19def _df_to_fasta(self: pl.DataFrame, id_col: str, seq_col: str, path: str | Path) -> None:
20    to_fasta(self, id_col, seq_col, path)
21
22
23pl.DataFrame.to_fasta = _df_to_fasta  # type: ignore[attr-defined]
24
25
26def read_fasta(filename: str | Path) -> pl.DataFrame:
27    seqs = []
28    for record in SeqIO.parse(str(filename), "fasta"):
29        seqs.append({"id": record.id, "sequence": str(record.seq)})
30    return pl.DataFrame(seqs)
def to_fasta( df: polars.dataframe.frame.DataFrame, id_col: str, seq_col: str, path: str | pathlib.Path) -> None:
12def to_fasta(df: pl.DataFrame, id_col: str, seq_col: str, path: str | Path) -> None:
13    """Write a Polars DataFrame to FASTA."""
14    dest = Path(path)
15    with dest.open("w") as f:
16        for row in df.iter_rows(named=True):
17            f.write(f">{row[id_col]}\n{row[seq_col]}\n")

Write a Polars DataFrame to FASTA.

def read_fasta(filename: str | pathlib.Path) -> polars.dataframe.frame.DataFrame:
27def read_fasta(filename: str | Path) -> pl.DataFrame:
28    seqs = []
29    for record in SeqIO.parse(str(filename), "fasta"):
30        seqs.append({"id": record.id, "sequence": str(record.seq)})
31    return pl.DataFrame(seqs)