Key Features
Signal Computations
Compute AUC, mean, max, min, median, stddev, coverage—and even non-zero means—over BED regions.
Multi-Track Support
Simultaneously process dozens of BigWig tracks in parallel, each result auto-merged to minimize I/O.
Normalization & Comparison
Built-in Z-score, log2, quantile normalization, and fold-change/difference/comparison utilities.
Statistical Tests
One-way & two-way ANOVA, t-tests, Mann–Whitney U—powered by statsmodels & SciPy under the hood.
Supercharged Performance
Dynamic region merging, numpy vectorization, and multithreading—benchmarks show 10× speedups vs. pyBigWig.
Extensible API
Easily drop in custom normalization, comparison, or signal-processing routines via plugin hooks.
Quickstart
Install the library and get started with just a few lines of code.
Install
pip install signalframe
Example Usage
import signalframe as sf
# Load a BED file
bed = sf.load_bed("peaks.bed")
# Compute AUC from a BigWig file
result = sf.compute_signal("sample.bw", bed, method="auc")
# Save results
sf.save_bed(result, "output.bed")
API Reference
load_bed(bed_path, extra_col_names=None)
import signalframe as sf
# Load BED file with default column naming
bed_df = sf.load_bed("regions.bed")
print(bed_df.head())
# Load BED file with custom column names
bed_df = sf.load_bed("regions.bed", extra_col_names=["name", "score", "strand"])
print(bed_df.head())
save_bed(df, output_path)
import signalframe as sf
# Save DataFrame as BED file
sf.save_bed(bed_df, "output.bed")
expand_bed_regions(df, method, expand_bp)
import signalframe as sf
# Example 1: Expand regions by 100 bp around the center
expanded_df = sf.expand_bed_regions(bed_df, method="center", expand_bp=100)
# Example 2: Expand regions by 50 bp from the edges
expanded_df = sf.expand_bed_regions(bed_df, method="edge", expand_bp=50)
# Example 3: Return unmodified regions
unchanged_df = sf.expand_bed_regions(bed_df)
compute_signal(bigwig_path, bed_df, method)
import signalframe as sf
# Load BED DataFrame
bed_df = sf.load_bed("regions.bed")
# Compute area under the curve (AUC) for each region
bed_df_with_auc = sf.compute_signal("H3K27ac.bw", bed_df, method="auc")
# Compute mean signal
bed_df_with_mean = sf.compute_signal("ATAC_signal.bw", bed_df, method="mean")
compute_signal_multi(bigwig_paths, bed_df, method)
import signalframe as sf
# Load a BED file
bed_df = sf.load_bed("peaks.bed")
# Define BigWig files
bw_files = ["H3K27ac.bw", "ATAC.bw", "H3K4me1.bw"]
# Compute AUC for each region in each file
result_df = sf.compute_signal_multi(bw_files, bed_df, method="auc")
# Compute mean signal
result_df = sf.compute_signal_multi(bw_files, bed_df, method="mean")
normalize_signal(df, columns, method)
import signalframe as sf
# Normalize by region length (AUC per bp)
df = sf.normalize_signal(df, columns="H3K27ac_auc", method="length")
# Z-score normalization
df = sf.normalize_signal(df, columns=["H3K27ac_auc", "ATAC_auc"], method="zscore")
# Log2 normalization with pseudocount
df = sf.normalize_signal(df, columns="H3K27ac_auc", method="log2", pseudocount=0.01)
# Min-max scaling
df = sf.normalize_signal(df, columns="ATAC_auc", method="minmax")
# Quantile normalization using median profile
df = sf.normalize_signal(
df,
columns=["H3K27ac_auc", "ATAC_auc"],
method="quantile",
reference_matrix="median"
)
compare_tracks(df, reference, comparisons, mode)
import signalframe as sf
# Compare ATAC signal to H3K27ac using fold change and log2FC
df = sf.compare_tracks(df, reference="ATAC_auc", comparisons="H3K27ac_auc", mode=["fold_change", "log2FC"])
# Compare against multiple tracks using all metrics
df = sf.compare_tracks(df, reference="ATAC_auc", comparisons=["H3K27ac_auc", "H3K4me1_auc"])
sort_signal_df(df, sort_by, ascending=True)
import signalframe as sf
# Sort by genomic coordinates
sorted_df = sf.sort_signal_df(df, sort_by="genomic_position")
# Sort by signal intensity (e.g., AUC column)
sorted_df = sf.sort_signal_df(df, sort_by="H3K27ac_auc", ascending=False)
compare_signal_groups(df, group_col, value_col, test)
import signalframe as sf
# Compare mean signal between FoxP3 and input groups using t-test
pval = sf.compare_signal_groups(df, group_col="group", value_col="H3K27ac_auc", test="t-test")
# Use Mann–Whitney U test instead
pval = sf.compare_signal_groups(df, group_col="group", value_col="ATAC_auc", test="mannwhitney")
run_one_way_anova(df, factor, response)
import signalframe as sf
# Run one-way ANOVA comparing H3K27ac signal across tissue types
anova_table = sf.run_one_way_anova(df, factor="tissue", response="H3K27ac_auc")
# Also retrieve the fitted model for further inspection
anova_table, model = sf.run_one_way_anova(df, factor="condition", response="ATAC_signal", return_model=True)
run_two_way_anova(df, factor1, factor2, response)
import signalframe as sf
# Run two-way ANOVA including interaction
anova_table = sf.run_two_way_anova(df, factor1="tissue", factor2="condition", response="H3K27ac_auc")
# Run without interaction and return the fitted model as well
anova_table, model = sf.run_two_way_anova(
df,
factor1="cell_type",
factor2="treatment",
response="signal",
include_interaction=False,
return_model=True
)
plot_signal_region(bigwig_paths, chrom, start, end)
import signalframe as sf
# Plot a single BigWig track
sf.plot_signal_region("ATAC.bw", chrom="chr15", start=100000, end=101000)
# Plot multiple tracks with custom labels and a title
sf.plot_signal_region(
bigwig_paths=["H3K27ac.bw", "ATAC.bw"],
chrom="chr7",
start=50000,
end=52000,
labels=["H3K27ac", "ATAC-seq"],
title="Signal at chr7:50,000-52,000"
)
plot_signals_from_bed(bed_df, bigwig_paths)
import signalframe as sf
# Plot the first 10 peaks in the BED DataFrame
sf.plot_signals_from_bed(bed_df, bigwig_paths="H3K27ac.bw")
# Plot up to 5 regions with two BigWig tracks, shared y-axis turned off
sf.plot_signals_from_bed(
bed_df,
bigwig_paths=["ATAC.bw", "H3K27ac.bw"],
max_plots=5,
shared_y=False,
labels=["ATAC-seq", "H3K27ac"]
)
Read the Paper
Learn about the algorithms, benchmarks, and biological applications behind SignalFrame.
View PDF