Ultra-fast Genomic Signal Processing

Compute AUC, mean, max, and more from BigWig & BED in seconds—no compromises.

Key Features

Signal Computations

Compute AUC, mean, max, min, median, stddev, coverage—and even non-zero means—over BED regions.

Multi-Track Support

Simultaneously process dozens of BigWig tracks in parallel, each result auto-merged to minimize I/O.

Normalization & Comparison

Built-in Z-score, log2, quantile normalization, and fold-change/difference/comparison utilities.

Statistical Tests

One-way & two-way ANOVA, t-tests, Mann–Whitney U—powered by statsmodels & SciPy under the hood.

Supercharged Performance

Dynamic region merging, numpy vectorization, and multithreading—benchmarks show 10× speedups vs. pyBigWig.

Extensible API

Easily drop in custom normalization, comparison, or signal-processing routines via plugin hooks.

Quickstart

Install the library and get started with just a few lines of code.

💾

Install

    pip install signalframe
⚙️

Example Usage

    import signalframe as sf

    # Load a BED file
    bed = sf.load_bed("peaks.bed")

    # Compute AUC from a BigWig file
    result = sf.compute_signal("sample.bw", bed, method="auc")

    # Save results
    sf.save_bed(result, "output.bed")

API Reference

load_bed(bed_path, extra_col_names=None)

    import signalframe as sf

    # Load BED file with default column naming
    bed_df = sf.load_bed("regions.bed")
    print(bed_df.head())

    # Load BED file with custom column names
    bed_df = sf.load_bed("regions.bed", extra_col_names=["name", "score", "strand"])
    print(bed_df.head())

save_bed(df, output_path)

    import signalframe as sf

    # Save DataFrame as BED file
    sf.save_bed(bed_df, "output.bed")

expand_bed_regions(df, method, expand_bp)

    import signalframe as sf

    # Example 1: Expand regions by 100 bp around the center
    expanded_df = sf.expand_bed_regions(bed_df, method="center", expand_bp=100)

    # Example 2: Expand regions by 50 bp from the edges
    expanded_df = sf.expand_bed_regions(bed_df, method="edge", expand_bp=50)

    # Example 3: Return unmodified regions
    unchanged_df = sf.expand_bed_regions(bed_df)

compute_signal(bigwig_path, bed_df, method)

    import signalframe as sf

    # Load BED DataFrame
    bed_df = sf.load_bed("regions.bed")

    # Compute area under the curve (AUC) for each region
    bed_df_with_auc = sf.compute_signal("H3K27ac.bw", bed_df, method="auc")

    # Compute mean signal
    bed_df_with_mean = sf.compute_signal("ATAC_signal.bw", bed_df, method="mean")

compute_signal_multi(bigwig_paths, bed_df, method)

    import signalframe as sf

    # Load a BED file
    bed_df = sf.load_bed("peaks.bed")

    # Define BigWig files
    bw_files = ["H3K27ac.bw", "ATAC.bw", "H3K4me1.bw"]

    # Compute AUC for each region in each file
    result_df = sf.compute_signal_multi(bw_files, bed_df, method="auc")

    # Compute mean signal
    result_df = sf.compute_signal_multi(bw_files, bed_df, method="mean")

normalize_signal(df, columns, method)

    import signalframe as sf

    # Normalize by region length (AUC per bp)
    df = sf.normalize_signal(df, columns="H3K27ac_auc", method="length")

    # Z-score normalization
    df = sf.normalize_signal(df, columns=["H3K27ac_auc", "ATAC_auc"], method="zscore")

    # Log2 normalization with pseudocount
    df = sf.normalize_signal(df, columns="H3K27ac_auc", method="log2", pseudocount=0.01)

    # Min-max scaling
    df = sf.normalize_signal(df, columns="ATAC_auc", method="minmax")

    # Quantile normalization using median profile
    df = sf.normalize_signal(
      df,
      columns=["H3K27ac_auc", "ATAC_auc"],
      method="quantile",
      reference_matrix="median"
    )

compare_tracks(df, reference, comparisons, mode)

    import signalframe as sf

    # Compare ATAC signal to H3K27ac using fold change and log2FC
    df = sf.compare_tracks(df, reference="ATAC_auc", comparisons="H3K27ac_auc", mode=["fold_change", "log2FC"])

    # Compare against multiple tracks using all metrics
    df = sf.compare_tracks(df, reference="ATAC_auc", comparisons=["H3K27ac_auc", "H3K4me1_auc"])

sort_signal_df(df, sort_by, ascending=True)

    import signalframe as sf

    # Sort by genomic coordinates
    sorted_df = sf.sort_signal_df(df, sort_by="genomic_position")

    # Sort by signal intensity (e.g., AUC column)
    sorted_df = sf.sort_signal_df(df, sort_by="H3K27ac_auc", ascending=False)

compare_signal_groups(df, group_col, value_col, test)

    import signalframe as sf

    # Compare mean signal between FoxP3 and input groups using t-test
    pval = sf.compare_signal_groups(df, group_col="group", value_col="H3K27ac_auc", test="t-test")

    # Use Mann–Whitney U test instead
    pval = sf.compare_signal_groups(df, group_col="group", value_col="ATAC_auc", test="mannwhitney")

run_one_way_anova(df, factor, response)

    import signalframe as sf

    # Run one-way ANOVA comparing H3K27ac signal across tissue types
    anova_table = sf.run_one_way_anova(df, factor="tissue", response="H3K27ac_auc")

    # Also retrieve the fitted model for further inspection
    anova_table, model = sf.run_one_way_anova(df, factor="condition", response="ATAC_signal", return_model=True)

run_two_way_anova(df, factor1, factor2, response)

    import signalframe as sf

    # Run two-way ANOVA including interaction
    anova_table = sf.run_two_way_anova(df, factor1="tissue", factor2="condition", response="H3K27ac_auc")

    # Run without interaction and return the fitted model as well
    anova_table, model = sf.run_two_way_anova(
      df,
      factor1="cell_type",
      factor2="treatment",
      response="signal",
      include_interaction=False,
      return_model=True
    )

plot_signal_region(bigwig_paths, chrom, start, end)

    import signalframe as sf

    # Plot a single BigWig track
    sf.plot_signal_region("ATAC.bw", chrom="chr15", start=100000, end=101000)

    # Plot multiple tracks with custom labels and a title
    sf.plot_signal_region(
      bigwig_paths=["H3K27ac.bw", "ATAC.bw"],
      chrom="chr7",
      start=50000,
      end=52000,
      labels=["H3K27ac", "ATAC-seq"],
      title="Signal at chr7:50,000-52,000"
    )

plot_signals_from_bed(bed_df, bigwig_paths)

    import signalframe as sf

    # Plot the first 10 peaks in the BED DataFrame
    sf.plot_signals_from_bed(bed_df, bigwig_paths="H3K27ac.bw")

    # Plot up to 5 regions with two BigWig tracks, shared y-axis turned off
    sf.plot_signals_from_bed(
      bed_df,
      bigwig_paths=["ATAC.bw", "H3K27ac.bw"],
      max_plots=5,
      shared_y=False,
      labels=["ATAC-seq", "H3K27ac"]
    )

Read the Paper

Learn about the algorithms, benchmarks, and biological applications behind SignalFrame.

View PDF