Skip to content
Merged
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ You should customize your file-reading method to suit your specific application.
{
using var afr = new NAudio.Wave.AudioFileReader(filePath);
int sampleRate = afr.WaveFormat.SampleRate;
int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8);
int bytesPerSample = afr.WaveFormat.BitsPerSample / 8;
int sampleCount = (int)(afr.Length / bytesPerSample);
int channelCount = afr.WaveFormat.Channels;
var audio = new List<double>(sampleCount);
var buffer = new float[sampleRate * channelCount];
Expand Down
15 changes: 15 additions & 0 deletions dev/python/readwav.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
sample rate: 44100
values: 166671
value 12345: 4435
"""
from scipy.io import wavfile
import pathlib
PATH_HERE = pathlib.Path(__file__).parent
PATH_DATA = PATH_HERE.joinpath("../../data")

if __name__ == "__main__":
for wavFilePath in PATH_DATA.glob("*.wav"):
wavFilePath = PATH_DATA.joinpath(wavFilePath)
samplerate, data = wavfile.read(wavFilePath)
print(f"{wavFilePath.name}, {samplerate}, {len(data)}")
9 changes: 8 additions & 1 deletion src/Spectrogram.Tests/AudioFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ namespace Spectrogram.Tests
{
public static class AudioFile
{
/// <summary>
/// Use NAudio to read the contents of a WAV file.
/// </summary>
public static (double[] audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000)
{
using var afr = new NAudio.Wave.AudioFileReader(filePath);
int sampleRate = afr.WaveFormat.SampleRate;
int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8);
int bytesPerSample = afr.WaveFormat.BitsPerSample / 8;
int sampleCount = (int)afr.Length / bytesPerSample;
int channelCount = afr.WaveFormat.Channels;
var audio = new List<double>(sampleCount);
var buffer = new float[sampleRate * channelCount];
Expand All @@ -21,6 +25,9 @@ public static (double[] audio, int sampleRate) ReadWAV(string filePath, double m
return (audio.ToArray(), sampleRate);
}

/// <summary>
/// Use MP3Sharp to read the contents of an MP3 file.
/// </summary>
public static double[] ReadMP3(string filePath, int bufferSize = 4096)
{
List<double> audio = new List<double>();
Expand Down
27 changes: 27 additions & 0 deletions src/Spectrogram.Tests/AudioFileTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Text;

namespace Spectrogram.Tests
{
class AudioFileTests
{
/// <summary>
/// Compare values read from the WAV reader against those read by Python's SciPy module (see script in /dev folder)
/// </summary>
[TestCase("cant-do-that-44100.wav", 44_100, 166_671, 1)]
[TestCase("03-02-03-01-02-01-19.wav", 48_000, 214_615, 1)]
[TestCase("qrss-10min.wav", 6_000, 3_600_000, 1)]
[TestCase("cant-do-that-11025-stereo.wav", 11_025, 41668, 2)]
[TestCase("asehgal-original.wav", 40_000, 1_600_000, 1)]
public void Test_AudioFile_LengthAndSampleRate(string filename, int knownRate, int knownLength, int channels)
{
string filePath = $"../../../../../data/{filename}";
(double[] audio, int sampleRate) = AudioFile.ReadWAV(filePath);

Assert.AreEqual(knownRate, sampleRate);
Assert.AreEqual(knownLength, audio.Length / channels);
}
}
}
177 changes: 172 additions & 5 deletions src/Spectrogram/SpectrogramGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,104 @@ namespace Spectrogram
{
public class SpectrogramGenerator
{
/// <summary>
/// Number of pixel columns (FFT samples) in the spectrogram image
/// </summary>
public int Width { get { return ffts.Count; } }

/// <summary>
/// Number of pixel rows (frequency bins) in the spectrogram image
/// </summary>
public int Height { get { return settings.Height; } }

/// <summary>
/// Number of samples to use for each FFT (must be a power of 2)
/// </summary>
public int FftSize { get { return settings.FftSize; } }

/// <summary>
/// Vertical resolution (frequency bin size depends on FftSize and SampleRate)
/// </summary>
public double HzPerPx { get { return settings.HzPerPixel; } }

/// <summary>
/// Horizontal resolution (seconds per pixel depends on StepSize)
/// </summary>
public double SecPerPx { get { return settings.StepLengthSec; } }

/// <summary>
/// Number of FFTs that remain to be processed for data which has been added but not yet analuyzed
/// </summary>
public int FftsToProcess { get { return (newAudio.Count - settings.FftSize) / settings.StepSize; } }

/// <summary>
/// Total number of FFT steps processed
/// </summary>
public int FftsProcessed { get; private set; }

/// <summary>
/// Index of the pixel column which will be populated next. Location of vertical line for wrap-around displays.
/// </summary>
public int NextColumnIndex { get { return (FftsProcessed + rollOffset) % Width; } }

/// <summary>
/// This value is added to displayed frequency axis tick labels
/// </summary>
public int OffsetHz { get { return settings.OffsetHz; } set { settings.OffsetHz = value; } }

/// <summary>
/// Number of samples per second
/// </summary>
public int SampleRate { get { return settings.SampleRate; } }

/// <summary>
/// Number of samples to step forward after each FFT is processed.
/// This value controls the horizontal resolution of the spectrogram.
/// </summary>
public int StepSize { get { return settings.StepSize; } }

/// <summary>
/// The spectrogram is trimmed to cut-off frequencies below this value.
/// </summary>
public double FreqMax { get { return settings.FreqMax; } }

/// <summary>
/// The spectrogram is trimmed to cut-off frequencies above this value.
/// </summary>
public double FreqMin { get { return settings.FreqMin; } }

private readonly Settings settings;
private readonly List<double[]> ffts = new List<double[]>();
private readonly List<double> newAudio = new List<double>();
private readonly List<double> newAudio;
private Colormap cmap = Colormap.Viridis;

public SpectrogramGenerator(int sampleRate, int fftSize, int stepSize,
double minFreq = 0, double maxFreq = double.PositiveInfinity,
int? fixedWidth = null, int offsetHz = 0)
/// <summary>
/// Instantiate a spectrogram generator.
/// This module calculates the FFT over a moving window as data comes in.
/// Using the Add() method to load new data and process it as it arrives.
/// </summary>
/// <param name="sampleRate">Number of samples per second (Hz)</param>
/// <param name="fftSize">Number of samples to use for each FFT operation. This value must be a power of 2.</param>
/// <param name="stepSize">Number of samples to step forward</param>
/// <param name="minFreq">Frequency data lower than this value (Hz) will not be stored</param>
/// <param name="maxFreq">Frequency data higher than this value (Hz) will not be stored</param>
/// <param name="fixedWidth">Spectrogram output will always be sized to this width (column count)</param>
/// <param name="offsetHz">This value will be added to displayed frequency axis tick labels</param>
/// <param name="initialAudioList">Analyze this data immediately (alternative to calling Add() later)</param>
public SpectrogramGenerator(
int sampleRate,
int fftSize,
int stepSize,
double minFreq = 0,
double maxFreq = double.PositiveInfinity,
int? fixedWidth = null,
int offsetHz = 0,
List<double> initialAudioList = null)
{
settings = new Settings(sampleRate, fftSize, stepSize, minFreq, maxFreq, offsetHz);

newAudio = initialAudioList ?? new List<double>();

if (fixedWidth.HasValue)
SetFixedWidth(fixedWidth.Value);
}
Expand All @@ -56,11 +129,18 @@ public override string ToString()
$"overlap: {settings.StepOverlapFrac * 100:N0}%";
}

/// <summary>
/// Set the colormap to use for future renders
/// </summary>
public void SetColormap(Colormap cmap)
{
this.cmap = cmap ?? this.cmap;
}

/// <summary>
/// Load a custom window kernel to multiply against each FFT sample prior to processing.
/// Windows must be at least the length of FftSize and typically have a sum of 1.0.
/// </summary>
public void SetWindow(double[] newWindow)
{
if (newWindow.Length > settings.FftSize)
Expand All @@ -82,19 +162,36 @@ public void AddCircular(float[] values) { }
[Obsolete("use the Add() method", true)]
public void AddScroll(float[] values) { }

public void Add(double[] audio, bool process = true)
/// <summary>
/// Load new data into the spectrogram generator
/// </summary>
public void Add(IEnumerable<double> audio, bool process = true)
{
newAudio.AddRange(audio);
if (process)
Process();
}

/// <summary>
/// The roll offset is used to calculate NextColumnIndex and can be set to a positive number
/// to begin adding new columns to the center of the spectrogram.
/// This can also be used to artificially move the next column index to zero even though some
/// data has already been accumulated.
/// </summary>
private int rollOffset = 0;

/// <summary>
/// Reset the next column index such that the next processed FFT will appear at the far left of the spectrogram.
/// </summary>
/// <param name="offset"></param>
public void RollReset(int offset = 0)
{
rollOffset = -FftsProcessed + offset;
}

/// <summary>
/// Perform FFT analysis on all unprocessed data
/// </summary>
public double[][] Process()
{
if (FftsToProcess < 1)
Expand Down Expand Up @@ -127,6 +224,10 @@ public double[][] Process()
return newFfts;
}

/// <summary>
/// Return a list of the mel-scaled FFTs contained in this spectrogram
/// </summary>
/// <param name="melBinCount">Total number of output bins to use. Choose a value significantly smaller than Height.</param>
public List<double[]> GetMelFFTs(int melBinCount)
{
if (settings.FreqMin != 0)
Expand All @@ -139,15 +240,44 @@ public List<double[]> GetMelFFTs(int melBinCount)
return fftsMel;
}

/// <summary>
/// Create and return a spectrogram bitmap from the FFTs stored in memory.
/// </summary>
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
/// <param name="dB">If true, output will be log-transformed.</param>
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
/// <param name="roll">Behavior of the spectrogram when it is full of data.
/// Roll (true) adds new columns on the left overwriting the oldest ones.
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
public Bitmap GetBitmap(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) =>
Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex);

/// <summary>
/// Create a Mel-scaled spectrogram.
/// </summary>
/// <param name="melBinCount">Total number of output bins to use. Choose a value significantly smaller than Height.</param>
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
/// <param name="dB">If true, output will be log-transformed.</param>
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
/// <param name="roll">Behavior of the spectrogram when it is full of data.
/// Roll (true) adds new columns on the left overwriting the oldest ones.
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
public Bitmap GetBitmapMel(int melBinCount = 25, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) =>
Image.GetBitmap(GetMelFFTs(melBinCount), cmap, intensity, dB, dBScale, roll, NextColumnIndex);

[Obsolete("use SaveImage()", true)]
public void SaveBitmap(Bitmap bmp, string fileName) { }

/// <summary>
/// Generate the spectrogram and save it as an image file.
/// </summary>
/// <param name="fileName">Path of the file to save.</param>
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
/// <param name="dB">If true, output will be log-transformed.</param>
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
/// <param name="roll">Behavior of the spectrogram when it is full of data.
/// Roll (true) adds new columns on the left overwriting the oldest ones.
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
public void SaveImage(string fileName, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false)
{
if (ffts.Count == 0)
Expand All @@ -170,6 +300,15 @@ public void SaveImage(string fileName, double intensity = 1, bool dB = false, do
Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex).Save(fileName, fmt);
}

/// <summary>
/// Create and return a spectrogram bitmap from the FFTs stored in memory.
/// The output will be scaled-down vertically by binning according to a reduction factor and keeping the brightest pixel value in each bin.
/// </summary>
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
/// <param name="dB">If true, output will be log-transformed.</param>
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
/// <param name="roll">Behavior of the spectrogram when it is full of data.
/// <param name="reduction"></param>
public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false, int reduction = 4)
{
List<double[]> ffts2 = new List<double[]>();
Expand All @@ -185,14 +324,25 @@ public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale
return Image.GetBitmap(ffts2, cmap, intensity, dB, dBScale, roll, NextColumnIndex);
}

/// <summary>
/// Export spectrogram data using the Spectrogram File Format (SFF)
/// </summary>
public void SaveData(string filePath, int melBinCount = 0)
{
if (!filePath.EndsWith(".sff", StringComparison.OrdinalIgnoreCase))
filePath += ".sff";
new SFF(this, melBinCount).Save(filePath);
}

/// <summary>
/// Defines the total number of FFTs (spectrogram columns) to store in memory. Determines Width.
/// </summary>
private int fixedWidth = 0;

/// <summary>
/// Configure the Spectrogram to maintain a fixed number of pixel columns.
/// Zeros will be added to padd existing data to achieve this width, and extra columns will be deleted.
/// </summary>
public void SetFixedWidth(int width)
{
fixedWidth = width;
Expand All @@ -212,11 +362,21 @@ private void PadOrTrimForFixedWidth()
}
}

/// <summary>
/// Get a vertical image containing ticks and tick labels for the frequency axis.
/// </summary>
/// <param name="width">size (pixels)</param>
/// <param name="offsetHz">number to add to each tick label</param>
/// <param name="tickSize">length of each tick mark (pixels)</param>
/// <param name="reduction">bin size for vertical data reduction</param>
public Bitmap GetVerticalScale(int width, int offsetHz = 0, int tickSize = 3, int reduction = 1)
{
return Scale.Vertical(width, settings, offsetHz, tickSize, reduction);
}

/// <summary>
/// Return the vertical position (pixel units) for the given frequency
/// </summary>
public int PixelY(double frequency, int reduction = 1)
{
int pixelsFromZeroHz = (int)(settings.PxPerHz * frequency / reduction);
Expand All @@ -225,11 +385,18 @@ public int PixelY(double frequency, int reduction = 1)
return pixelRow - 1;
}

/// <summary>
/// Return a list of the FFTs in memory underlying the spectrogram
/// </summary>
public List<double[]> GetFFTs()
{
return ffts;
}

/// <summary>
/// Return frequency and magnitude of the dominant frequency.
/// </summary>
/// <param name="latestFft">If true, only the latest FFT will be assessed.</param>
public (double freqHz, double magRms) GetPeak(bool latestFft = true)
{
if (ffts.Count == 0)
Expand Down