From f1e9fe59b59c2c48b1dbf5c870da877291e4ef2a Mon Sep 17 00:00:00 2001 From: shirok1 Date: Mon, 30 Aug 2021 04:02:05 +0800 Subject: [PATCH 01/11] change the `sampleCount` expression in sample `ReadWAV` in readme.md to match the eventual size of `audio` --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 83f9206..a4d45ea 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ You should customize your file-reading method to suit your specific application. { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; - int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8); + int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample) * 8; int channelCount = afr.WaveFormat.Channels; var audio = new List(sampleCount); var buffer = new float[sampleRate * channelCount]; From f313dc113e232d0a8a4699808d4f9f7f094f91f9 Mon Sep 17 00:00:00 2001 From: shirok1 Date: Mon, 30 Aug 2021 04:06:03 +0800 Subject: [PATCH 02/11] change the type of the 1st para of `Add` from `double[]` to `IEnumerable` --- README.md | 16 ++++++++-------- src/Spectrogram/SpectrogramGenerator.cs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a4d45ea..63cfd98 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ _"I'm sorry Dave... I'm afraid I can't do that"_ * Source code for the WAV reading method is at the bottom of this page. ```cs -(double[] audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); sg.SaveImage("hal.png"); @@ -59,7 +59,7 @@ public Form1() Whenever an audio buffer gets filled, add the data to your Spectrogram: ```cs -private void GotNewBuffer(double[] audio) +private void GotNewBuffer(IEnumerable audio) { sg.Add(audio); } @@ -81,7 +81,7 @@ Review the source code of the demo application for additional details and consid This example demonstrates how to convert a MP3 file to a spectrogram image. A sample MP3 audio file in the [data folder](data) contains the audio track from Ken Barker's excellent piano performance of George Frideric Handel's Suite No. 5 in E major for harpsichord ([_The Harmonious Blacksmith_](https://en.wikipedia.org/wiki/The_Harmonious_Blacksmith)). This audio file is included [with permission](dev/Handel%20-%20Air%20and%20Variations.txt), and the [original video can be viewed on YouTube](https://www.youtube.com/watch?v=Mza-xqk770k). ```cs -(double[] audio, int sampleRate) = ReadWAV("song.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("song.wav"); int fftSize = 16384; int targetWidthPx = 3000; @@ -117,7 +117,7 @@ Spectrogram (2993, 817) These examples demonstrate the identical spectrogram analyzed with a variety of different colormaps. Spectrogram colormaps can be changed by calling the `SetColormap()` method: ```cs -(double[] audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 8192, stepSize: 200, maxFreq: 3000); sg.Add(audio); sg.SetColormap(Colormap.Jet); @@ -141,7 +141,7 @@ Cropped Linear Scale (0-3kHz) | Mel Scale (0-22 kHz) Amplitude perception in humans, like frequency perception, is logarithmic. Therefore, Mel spectrograms typically display log-transformed spectral power and are presented using Decibel units. ```cs -(double[] audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); @@ -166,7 +166,7 @@ SFF files be saved using `Complex` data format (with real and imaginary values f This example creates a spectrogram but saves it using the SFF file format instead of saving it as an image. The SFF file can then be read in any language. ```cs -(double[] audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 700, maxFreq: 2000); sg.Add(audio); sg.SaveData("hal.sff"); @@ -210,7 +210,7 @@ plt.show() You should customize your file-reading method to suit your specific application. I frequently use the NAudio package to read data from WAV and MP3 files. This function reads audio data from a mono WAV file and will be used for the examples on this page. ```cs -(double[] audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) +(IEnumerable audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; @@ -221,6 +221,6 @@ You should customize your file-reading method to suit your specific application. int samplesRead = 0; while ((samplesRead = afr.Read(buffer, 0, buffer.Length)) > 0) audio.AddRange(buffer.Take(samplesRead).Select(x => x * multiplier)); - return (audio.ToArray(), sampleRate); + return (audio, sampleRate); } ``` \ No newline at end of file diff --git a/src/Spectrogram/SpectrogramGenerator.cs b/src/Spectrogram/SpectrogramGenerator.cs index 89af261..ebc9c56 100644 --- a/src/Spectrogram/SpectrogramGenerator.cs +++ b/src/Spectrogram/SpectrogramGenerator.cs @@ -82,7 +82,7 @@ public void AddCircular(float[] values) { } [Obsolete("use the Add() method", true)] public void AddScroll(float[] values) { } - public void Add(double[] audio, bool process = true) + public void Add(IEnumerable audio, bool process = true) { newAudio.AddRange(audio); if (process) From 164b3dbe40f1d3f1a977e948b2d3a50f456b00f9 Mon Sep 17 00:00:00 2001 From: shirok1 Date: Mon, 30 Aug 2021 04:07:40 +0800 Subject: [PATCH 03/11] add an additional optional argument to manually init `newAudio` with an external `List` --- src/Spectrogram/SpectrogramGenerator.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Spectrogram/SpectrogramGenerator.cs b/src/Spectrogram/SpectrogramGenerator.cs index ebc9c56..4d96233 100644 --- a/src/Spectrogram/SpectrogramGenerator.cs +++ b/src/Spectrogram/SpectrogramGenerator.cs @@ -25,15 +25,17 @@ public class SpectrogramGenerator private readonly Settings settings; private readonly List ffts = new List(); - private readonly List newAudio = new List(); + private readonly List newAudio; private Colormap cmap = Colormap.Viridis; public SpectrogramGenerator(int sampleRate, int fftSize, int stepSize, double minFreq = 0, double maxFreq = double.PositiveInfinity, - int? fixedWidth = null, int offsetHz = 0) + int? fixedWidth = null, int offsetHz = 0, List initialAudioList = null) { settings = new Settings(sampleRate, fftSize, stepSize, minFreq, maxFreq, offsetHz); + newAudio = initialAudioList ?? new List(); + if (fixedWidth.HasValue) SetFixedWidth(fixedWidth.Value); } From 6640625b5bbf908d354011d75e3314fa18e8e233 Mon Sep 17 00:00:00 2001 From: shirok1 Date: Mon, 30 Aug 2021 04:40:49 +0800 Subject: [PATCH 04/11] change the type of tuple in readme.md to clarify --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 63cfd98..f81aa6d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ _"I'm sorry Dave... I'm afraid I can't do that"_ * Source code for the WAV reading method is at the bottom of this page. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(List audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); sg.SaveImage("hal.png"); @@ -81,7 +81,7 @@ Review the source code of the demo application for additional details and consid This example demonstrates how to convert a MP3 file to a spectrogram image. A sample MP3 audio file in the [data folder](data) contains the audio track from Ken Barker's excellent piano performance of George Frideric Handel's Suite No. 5 in E major for harpsichord ([_The Harmonious Blacksmith_](https://en.wikipedia.org/wiki/The_Harmonious_Blacksmith)). This audio file is included [with permission](dev/Handel%20-%20Air%20and%20Variations.txt), and the [original video can be viewed on YouTube](https://www.youtube.com/watch?v=Mza-xqk770k). ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("song.wav"); +(List audio, int sampleRate) = ReadWAV("song.wav"); int fftSize = 16384; int targetWidthPx = 3000; @@ -117,7 +117,7 @@ Spectrogram (2993, 817) These examples demonstrate the identical spectrogram analyzed with a variety of different colormaps. Spectrogram colormaps can be changed by calling the `SetColormap()` method: ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(List audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 8192, stepSize: 200, maxFreq: 3000); sg.Add(audio); sg.SetColormap(Colormap.Jet); @@ -141,7 +141,7 @@ Cropped Linear Scale (0-3kHz) | Mel Scale (0-22 kHz) Amplitude perception in humans, like frequency perception, is logarithmic. Therefore, Mel spectrograms typically display log-transformed spectral power and are presented using Decibel units. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(List audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); @@ -166,7 +166,7 @@ SFF files be saved using `Complex` data format (with real and imaginary values f This example creates a spectrogram but saves it using the SFF file format instead of saving it as an image. The SFF file can then be read in any language. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(List audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 700, maxFreq: 2000); sg.Add(audio); sg.SaveData("hal.sff"); @@ -210,7 +210,7 @@ plt.show() You should customize your file-reading method to suit your specific application. I frequently use the NAudio package to read data from WAV and MP3 files. This function reads audio data from a mono WAV file and will be used for the examples on this page. ```cs -(IEnumerable audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) +(List audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; From ab2890efde0dd3e2659809fb02e503601da5ca47 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 19:54:53 -0400 Subject: [PATCH 05/11] Revert "change the type of tuple in readme.md to clarify" This reverts commit 6640625b5bbf908d354011d75e3314fa18e8e233. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f81aa6d..63cfd98 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ _"I'm sorry Dave... I'm afraid I can't do that"_ * Source code for the WAV reading method is at the bottom of this page. ```cs -(List audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); sg.SaveImage("hal.png"); @@ -81,7 +81,7 @@ Review the source code of the demo application for additional details and consid This example demonstrates how to convert a MP3 file to a spectrogram image. A sample MP3 audio file in the [data folder](data) contains the audio track from Ken Barker's excellent piano performance of George Frideric Handel's Suite No. 5 in E major for harpsichord ([_The Harmonious Blacksmith_](https://en.wikipedia.org/wiki/The_Harmonious_Blacksmith)). This audio file is included [with permission](dev/Handel%20-%20Air%20and%20Variations.txt), and the [original video can be viewed on YouTube](https://www.youtube.com/watch?v=Mza-xqk770k). ```cs -(List audio, int sampleRate) = ReadWAV("song.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("song.wav"); int fftSize = 16384; int targetWidthPx = 3000; @@ -117,7 +117,7 @@ Spectrogram (2993, 817) These examples demonstrate the identical spectrogram analyzed with a variety of different colormaps. Spectrogram colormaps can be changed by calling the `SetColormap()` method: ```cs -(List audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 8192, stepSize: 200, maxFreq: 3000); sg.Add(audio); sg.SetColormap(Colormap.Jet); @@ -141,7 +141,7 @@ Cropped Linear Scale (0-3kHz) | Mel Scale (0-22 kHz) Amplitude perception in humans, like frequency perception, is logarithmic. Therefore, Mel spectrograms typically display log-transformed spectral power and are presented using Decibel units. ```cs -(List audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); @@ -166,7 +166,7 @@ SFF files be saved using `Complex` data format (with real and imaginary values f This example creates a spectrogram but saves it using the SFF file format instead of saving it as an image. The SFF file can then be read in any language. ```cs -(List audio, int sampleRate) = ReadWAV("hal.wav"); +(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 700, maxFreq: 2000); sg.Add(audio); sg.SaveData("hal.sff"); @@ -210,7 +210,7 @@ plt.show() You should customize your file-reading method to suit your specific application. I frequently use the NAudio package to read data from WAV and MP3 files. This function reads audio data from a mono WAV file and will be used for the examples on this page. ```cs -(List audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) +(IEnumerable audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; From 08f42544de23f920823273e819569495db845c0a Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 20:34:36 -0400 Subject: [PATCH 06/11] SpectrogramGenerator: add XML docs --- src/Spectrogram/SpectrogramGenerator.cs | 171 +++++++++++++++++++++++- 1 file changed, 168 insertions(+), 3 deletions(-) diff --git a/src/Spectrogram/SpectrogramGenerator.cs b/src/Spectrogram/SpectrogramGenerator.cs index 4d96233..3dad42d 100644 --- a/src/Spectrogram/SpectrogramGenerator.cs +++ b/src/Spectrogram/SpectrogramGenerator.cs @@ -9,18 +9,70 @@ namespace Spectrogram { public class SpectrogramGenerator { + /// + /// Number of pixel columns (FFT samples) in the spectrogram image + /// public int Width { get { return ffts.Count; } } + + /// + /// Number of pixel rows (frequency bins) in the spectrogram image + /// public int Height { get { return settings.Height; } } + + /// + /// Number of samples to use for each FFT (must be a power of 2) + /// public int FftSize { get { return settings.FftSize; } } + + /// + /// Vertical resolution (frequency bin size depends on FftSize and SampleRate) + /// public double HzPerPx { get { return settings.HzPerPixel; } } + + /// + /// Horizontal resolution (seconds per pixel depends on StepSize) + /// public double SecPerPx { get { return settings.StepLengthSec; } } + + /// + /// Number of FFTs that remain to be processed for data which has been added but not yet analuyzed + /// public int FftsToProcess { get { return (newAudio.Count - settings.FftSize) / settings.StepSize; } } + + /// + /// Total number of FFT steps processed + /// public int FftsProcessed { get; private set; } + + /// + /// Index of the pixel column which will be populated next. Location of vertical line for wrap-around displays. + /// public int NextColumnIndex { get { return (FftsProcessed + rollOffset) % Width; } } + + /// + /// This value is added to displayed frequency axis tick labels + /// public int OffsetHz { get { return settings.OffsetHz; } set { settings.OffsetHz = value; } } + + /// + /// Number of samples per second + /// public int SampleRate { get { return settings.SampleRate; } } + + /// + /// Number of samples to step forward after each FFT is processed. + /// This value controls the horizontal resolution of the spectrogram. + /// public int StepSize { get { return settings.StepSize; } } + + /// + /// The spectrogram is trimmed to cut-off frequencies below this value. + /// public double FreqMax { get { return settings.FreqMax; } } + + /// + /// The spectrogram is trimmed to cut-off frequencies above this value. + /// public double FreqMin { get { return settings.FreqMin; } } private readonly Settings settings; @@ -28,9 +80,28 @@ public class SpectrogramGenerator private readonly List newAudio; private Colormap cmap = Colormap.Viridis; - public SpectrogramGenerator(int sampleRate, int fftSize, int stepSize, - double minFreq = 0, double maxFreq = double.PositiveInfinity, - int? fixedWidth = null, int offsetHz = 0, List initialAudioList = null) + /// + /// Instantiate a spectrogram generator. + /// This module calculates the FFT over a moving window as data comes in. + /// Using the Add() method to load new data and process it as it arrives. + /// + /// Number of samples per second (Hz) + /// Number of samples to use for each FFT operation. This value must be a power of 2. + /// Number of samples to step forward + /// Frequency data lower than this value (Hz) will not be stored + /// Frequency data higher than this value (Hz) will not be stored + /// Spectrogram output will always be sized to this width (column count) + /// This value will be added to displayed frequency axis tick labels + /// Analyze this data immediately (alternative to calling Add() later) + public SpectrogramGenerator( + int sampleRate, + int fftSize, + int stepSize, + double minFreq = 0, + double maxFreq = double.PositiveInfinity, + int? fixedWidth = null, + int offsetHz = 0, + List initialAudioList = null) { settings = new Settings(sampleRate, fftSize, stepSize, minFreq, maxFreq, offsetHz); @@ -58,11 +129,18 @@ public override string ToString() $"overlap: {settings.StepOverlapFrac * 100:N0}%"; } + /// + /// Set the colormap to use for future renders + /// public void SetColormap(Colormap cmap) { this.cmap = cmap ?? this.cmap; } + /// + /// Load a custom window kernel to multiply against each FFT sample prior to processing. + /// Windows must be at least the length of FftSize and typically have a sum of 1.0. + /// public void SetWindow(double[] newWindow) { if (newWindow.Length > settings.FftSize) @@ -84,6 +162,9 @@ public void AddCircular(float[] values) { } [Obsolete("use the Add() method", true)] public void AddScroll(float[] values) { } + /// + /// Load new data into the spectrogram generator + /// public void Add(IEnumerable audio, bool process = true) { newAudio.AddRange(audio); @@ -91,12 +172,26 @@ public void Add(IEnumerable audio, bool process = true) Process(); } + /// + /// The roll offset is used to calculate NextColumnIndex and can be set to a positive number + /// to begin adding new columns to the center of the spectrogram. + /// This can also be used to artificially move the next column index to zero even though some + /// data has already been accumulated. + /// private int rollOffset = 0; + + /// + /// Reset the next column index such that the next processed FFT will appear at the far left of the spectrogram. + /// + /// public void RollReset(int offset = 0) { rollOffset = -FftsProcessed + offset; } + /// + /// Perform FFT analysis on all unprocessed data + /// public double[][] Process() { if (FftsToProcess < 1) @@ -129,6 +224,10 @@ public double[][] Process() return newFfts; } + /// + /// Return a list of the mel-scaled FFTs contained in this spectrogram + /// + /// Total number of output bins to use. Choose a value significantly smaller than Height. public List GetMelFFTs(int melBinCount) { if (settings.FreqMin != 0) @@ -141,15 +240,44 @@ public List GetMelFFTs(int melBinCount) return fftsMel; } + /// + /// Create and return a spectrogram bitmap from the FFTs stored in memory. + /// + /// Multiply the output by a fixed value to change its brightness. + /// If true, output will be log-transformed. + /// If dB scaling is in use, this multiplier will be applied before log transformation. + /// Behavior of the spectrogram when it is full of data. + /// Roll (true) adds new columns on the left overwriting the oldest ones. + /// Scroll (false) slides the whole image to the left and adds new columns to the right. public Bitmap GetBitmap(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) => Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex); + /// + /// Create a Mel-scaled spectrogram. + /// + /// Total number of output bins to use. Choose a value significantly smaller than Height. + /// Multiply the output by a fixed value to change its brightness. + /// If true, output will be log-transformed. + /// If dB scaling is in use, this multiplier will be applied before log transformation. + /// Behavior of the spectrogram when it is full of data. + /// Roll (true) adds new columns on the left overwriting the oldest ones. + /// Scroll (false) slides the whole image to the left and adds new columns to the right. public Bitmap GetBitmapMel(int melBinCount = 25, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) => Image.GetBitmap(GetMelFFTs(melBinCount), cmap, intensity, dB, dBScale, roll, NextColumnIndex); [Obsolete("use SaveImage()", true)] public void SaveBitmap(Bitmap bmp, string fileName) { } + /// + /// Generate the spectrogram and save it as an image file. + /// + /// Path of the file to save. + /// Multiply the output by a fixed value to change its brightness. + /// If true, output will be log-transformed. + /// If dB scaling is in use, this multiplier will be applied before log transformation. + /// Behavior of the spectrogram when it is full of data. + /// Roll (true) adds new columns on the left overwriting the oldest ones. + /// Scroll (false) slides the whole image to the left and adds new columns to the right. public void SaveImage(string fileName, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) { if (ffts.Count == 0) @@ -172,6 +300,15 @@ public void SaveImage(string fileName, double intensity = 1, bool dB = false, do Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex).Save(fileName, fmt); } + /// + /// Create and return a spectrogram bitmap from the FFTs stored in memory. + /// The output will be scaled-down vertically by binning according to a reduction factor and keeping the brightest pixel value in each bin. + /// + /// Multiply the output by a fixed value to change its brightness. + /// If true, output will be log-transformed. + /// If dB scaling is in use, this multiplier will be applied before log transformation. + /// Behavior of the spectrogram when it is full of data. + /// public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false, int reduction = 4) { List ffts2 = new List(); @@ -187,6 +324,9 @@ public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale return Image.GetBitmap(ffts2, cmap, intensity, dB, dBScale, roll, NextColumnIndex); } + /// + /// Export spectrogram data using the Spectrogram File Format (SFF) + /// public void SaveData(string filePath, int melBinCount = 0) { if (!filePath.EndsWith(".sff", StringComparison.OrdinalIgnoreCase)) @@ -194,7 +334,15 @@ public void SaveData(string filePath, int melBinCount = 0) new SFF(this, melBinCount).Save(filePath); } + /// + /// Defines the total number of FFTs (spectrogram columns) to store in memory. Determines Width. + /// private int fixedWidth = 0; + + /// + /// Configure the Spectrogram to maintain a fixed number of pixel columns. + /// Zeros will be added to padd existing data to achieve this width, and extra columns will be deleted. + /// public void SetFixedWidth(int width) { fixedWidth = width; @@ -214,11 +362,21 @@ private void PadOrTrimForFixedWidth() } } + /// + /// Get a vertical image containing ticks and tick labels for the frequency axis. + /// + /// size (pixels) + /// number to add to each tick label + /// length of each tick mark (pixels) + /// bin size for vertical data reduction public Bitmap GetVerticalScale(int width, int offsetHz = 0, int tickSize = 3, int reduction = 1) { return Scale.Vertical(width, settings, offsetHz, tickSize, reduction); } + /// + /// Return the vertical position (pixel units) for the given frequency + /// public int PixelY(double frequency, int reduction = 1) { int pixelsFromZeroHz = (int)(settings.PxPerHz * frequency / reduction); @@ -227,11 +385,18 @@ public int PixelY(double frequency, int reduction = 1) return pixelRow - 1; } + /// + /// Return a list of the FFTs in memory underlying the spectrogram + /// public List GetFFTs() { return ffts; } + /// + /// Return frequency and magnitude of the dominant frequency. + /// + /// If true, only the latest FFT will be assessed. public (double freqHz, double magRms) GetPeak(bool latestFft = true) { if (ffts.Count == 0) From dd9fb572e5a62993cae58a21b3bd74f06ec1fa44 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 20:56:04 -0400 Subject: [PATCH 07/11] Tests: assert ReadWAV() length is accurate Uses Python's scipy.io.wavfile as a source of truth. #33 #34 --- dev/python/readwav.py | 16 ++++++++++++++++ src/Spectrogram.Tests/AudioFileTests.cs | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 dev/python/readwav.py create mode 100644 src/Spectrogram.Tests/AudioFileTests.cs diff --git a/dev/python/readwav.py b/dev/python/readwav.py new file mode 100644 index 0000000..c53ba5a --- /dev/null +++ b/dev/python/readwav.py @@ -0,0 +1,16 @@ +""" +sample rate: 44100 +values: 166671 +value 12345: 4435 +""" +from scipy.io import wavfile +import pathlib +PATH_HERE = pathlib.Path(__file__).parent +PATH_DATA = PATH_HERE.joinpath("../../data") + +if __name__ == "__main__": + wavFilePath = PATH_DATA.joinpath("cant-do-that-44100.wav") + samplerate, data = wavfile.read(wavFilePath) + print(f"sample rate: {samplerate}") + print(f"values: {len(data)}") + print(f"value 12345: {data[12345]}") diff --git a/src/Spectrogram.Tests/AudioFileTests.cs b/src/Spectrogram.Tests/AudioFileTests.cs new file mode 100644 index 0000000..f4f4c22 --- /dev/null +++ b/src/Spectrogram.Tests/AudioFileTests.cs @@ -0,0 +1,23 @@ +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Spectrogram.Tests +{ + class AudioFileTests + { + /// + /// Compare values read from the WAV reader against those read by Python's SciPy module (see script in /dev folder) + /// + [Test] + public void Test_AudioFile_KnownValues() + { + (double[] audio, int sampleRate) = AudioFile.ReadWAV("../../../../../data/cant-do-that-44100.wav", multiplier: 32_000); + + Assert.AreEqual(44100, sampleRate); + Assert.AreEqual(166671, audio.Length); + Assert.AreEqual(4435, audio[12345], 1000); + } + } +} From a17a8c09f8af3c1f0361feb8e53e98c12da1bcc9 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 20:56:26 -0400 Subject: [PATCH 08/11] Tests: AudioFile XML docs --- src/Spectrogram.Tests/AudioFile.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Spectrogram.Tests/AudioFile.cs b/src/Spectrogram.Tests/AudioFile.cs index ab6b197..99875c6 100644 --- a/src/Spectrogram.Tests/AudioFile.cs +++ b/src/Spectrogram.Tests/AudioFile.cs @@ -7,11 +7,15 @@ namespace Spectrogram.Tests { public static class AudioFile { + /// + /// Use NAudio to read the contents of a WAV file. + /// public static (double[] audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; - int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8); + int bytesPerSample = afr.WaveFormat.BitsPerSample / 8; + int sampleCount = (int)afr.Length / bytesPerSample; int channelCount = afr.WaveFormat.Channels; var audio = new List(sampleCount); var buffer = new float[sampleRate * channelCount]; @@ -21,6 +25,9 @@ public static (double[] audio, int sampleRate) ReadWAV(string filePath, double m return (audio.ToArray(), sampleRate); } + /// + /// Use MP3Sharp to read the contents of an MP3 file. + /// public static double[] ReadMP3(string filePath, int bufferSize = 4096) { List audio = new List(); From 448282f716083072ecb3b5b6a1fdc853716774a8 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 21:15:46 -0400 Subject: [PATCH 09/11] Tests: AudioFile test all WAV files #33 #34 --- dev/python/readwav.py | 9 ++++----- src/Spectrogram.Tests/AudioFileTests.cs | 16 ++++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dev/python/readwav.py b/dev/python/readwav.py index c53ba5a..7515c8d 100644 --- a/dev/python/readwav.py +++ b/dev/python/readwav.py @@ -9,8 +9,7 @@ PATH_DATA = PATH_HERE.joinpath("../../data") if __name__ == "__main__": - wavFilePath = PATH_DATA.joinpath("cant-do-that-44100.wav") - samplerate, data = wavfile.read(wavFilePath) - print(f"sample rate: {samplerate}") - print(f"values: {len(data)}") - print(f"value 12345: {data[12345]}") + for wavFilePath in PATH_DATA.glob("*.wav"): + wavFilePath = PATH_DATA.joinpath(wavFilePath) + samplerate, data = wavfile.read(wavFilePath) + print(f"{wavFilePath.name}, {samplerate}, {len(data)}") diff --git a/src/Spectrogram.Tests/AudioFileTests.cs b/src/Spectrogram.Tests/AudioFileTests.cs index f4f4c22..60248fc 100644 --- a/src/Spectrogram.Tests/AudioFileTests.cs +++ b/src/Spectrogram.Tests/AudioFileTests.cs @@ -10,14 +10,18 @@ class AudioFileTests /// /// Compare values read from the WAV reader against those read by Python's SciPy module (see script in /dev folder) /// - [Test] - public void Test_AudioFile_KnownValues() + [TestCase("cant-do-that-44100.wav", 44_100, 166_671, 1)] + [TestCase("03-02-03-01-02-01-19.wav", 48_000, 214_615, 1)] + [TestCase("qrss-10min.wav", 6_000, 3_600_000, 1)] + [TestCase("cant-do-that-11025-stereo.wav", 11_025, 41668, 2)] + [TestCase("asehgal-original.wav", 40_000, 1_600_000, 1)] + public void Test_AudioFile_LengthAndSampleRate(string filename, int knownRate, int knownLength, int channels) { - (double[] audio, int sampleRate) = AudioFile.ReadWAV("../../../../../data/cant-do-that-44100.wav", multiplier: 32_000); + string filePath = $"../../../../../data/{filename}"; + (double[] audio, int sampleRate) = AudioFile.ReadWAV(filePath); - Assert.AreEqual(44100, sampleRate); - Assert.AreEqual(166671, audio.Length); - Assert.AreEqual(4435, audio[12345], 1000); + Assert.AreEqual(knownRate, sampleRate); + Assert.AreEqual(knownLength, audio.Length / channels); } } } From 817ae4692edfa7929a9b140a3866c3f938d2d861 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 21:27:57 -0400 Subject: [PATCH 10/11] use original readme --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 63cfd98..83f9206 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ _"I'm sorry Dave... I'm afraid I can't do that"_ * Source code for the WAV reading method is at the bottom of this page. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(double[] audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); sg.SaveImage("hal.png"); @@ -59,7 +59,7 @@ public Form1() Whenever an audio buffer gets filled, add the data to your Spectrogram: ```cs -private void GotNewBuffer(IEnumerable audio) +private void GotNewBuffer(double[] audio) { sg.Add(audio); } @@ -81,7 +81,7 @@ Review the source code of the demo application for additional details and consid This example demonstrates how to convert a MP3 file to a spectrogram image. A sample MP3 audio file in the [data folder](data) contains the audio track from Ken Barker's excellent piano performance of George Frideric Handel's Suite No. 5 in E major for harpsichord ([_The Harmonious Blacksmith_](https://en.wikipedia.org/wiki/The_Harmonious_Blacksmith)). This audio file is included [with permission](dev/Handel%20-%20Air%20and%20Variations.txt), and the [original video can be viewed on YouTube](https://www.youtube.com/watch?v=Mza-xqk770k). ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("song.wav"); +(double[] audio, int sampleRate) = ReadWAV("song.wav"); int fftSize = 16384; int targetWidthPx = 3000; @@ -117,7 +117,7 @@ Spectrogram (2993, 817) These examples demonstrate the identical spectrogram analyzed with a variety of different colormaps. Spectrogram colormaps can be changed by calling the `SetColormap()` method: ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(double[] audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 8192, stepSize: 200, maxFreq: 3000); sg.Add(audio); sg.SetColormap(Colormap.Jet); @@ -141,7 +141,7 @@ Cropped Linear Scale (0-3kHz) | Mel Scale (0-22 kHz) Amplitude perception in humans, like frequency perception, is logarithmic. Therefore, Mel spectrograms typically display log-transformed spectral power and are presented using Decibel units. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(double[] audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 500, maxFreq: 3000); sg.Add(audio); @@ -166,7 +166,7 @@ SFF files be saved using `Complex` data format (with real and imaginary values f This example creates a spectrogram but saves it using the SFF file format instead of saving it as an image. The SFF file can then be read in any language. ```cs -(IEnumerable audio, int sampleRate) = ReadWAV("hal.wav"); +(double[] audio, int sampleRate) = ReadWAV("hal.wav"); var sg = new SpectrogramGenerator(sampleRate, fftSize: 4096, stepSize: 700, maxFreq: 2000); sg.Add(audio); sg.SaveData("hal.sff"); @@ -210,17 +210,17 @@ plt.show() You should customize your file-reading method to suit your specific application. I frequently use the NAudio package to read data from WAV and MP3 files. This function reads audio data from a mono WAV file and will be used for the examples on this page. ```cs -(IEnumerable audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) +(double[] audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000) { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; - int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample) * 8; + int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8); int channelCount = afr.WaveFormat.Channels; var audio = new List(sampleCount); var buffer = new float[sampleRate * channelCount]; int samplesRead = 0; while ((samplesRead = afr.Read(buffer, 0, buffer.Length)) > 0) audio.AddRange(buffer.Take(samplesRead).Select(x => x * multiplier)); - return (audio, sampleRate); + return (audio.ToArray(), sampleRate); } ``` \ No newline at end of file From 59a466291493fabcfb2714f879dbe8221f1449a1 Mon Sep 17 00:00:00 2001 From: Scott W Harden Date: Sat, 4 Sep 2021 21:29:16 -0400 Subject: [PATCH 11/11] readme: increase verbosity of ReadWAV() --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 83f9206..ccc1c1d 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,8 @@ You should customize your file-reading method to suit your specific application. { using var afr = new NAudio.Wave.AudioFileReader(filePath); int sampleRate = afr.WaveFormat.SampleRate; - int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8); + int bytesPerSample = afr.WaveFormat.BitsPerSample / 8; + int sampleCount = (int)(afr.Length / bytesPerSample); int channelCount = afr.WaveFormat.Channels; var audio = new List(sampleCount); var buffer = new float[sampleRate * channelCount];