{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Add Noise to Speech at Specific SNR Levels\n\n\nAdd noise to speech at specific signal-to-noise ratio levels.\n\nTo see how soundpy implements this, see `soundpy.dsp.add_backgroundsound`.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's import soundpy, and ipd for playing audio data\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import soundpy as sp\nimport IPython.display as ipd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define the speech and noise data samples\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I will use speech and noise data from the soundpy repo.\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Designate path relevant for accessing audiodata\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp_dir = '../../../'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Speech sample:\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "speech_sample = '{}audiodata/python.wav'.format(sp_dir)\nspeech_sample = sp.utils.string2pathlib(speech_sample)\n# as pathlib object, can do the following: \nword = speech_sample.stem\nword" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Noise sample:\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noise_sample = '{}audiodata/background_samples/cafe.wav'.format(sp_dir)\nnoise_sample = sp.utils.string2pathlib(noise_sample)\n# as pathlib object, can do the following: \nnoise = noise_sample.stem\nnoise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hear Clean Speech\n~~~~~~~~~~~~~~~~~\nI'm using a higher sample rate here as calculating SNR \nperforms best upwards of 44100 Hz.\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sr = 44100\ns, sr = sp.loadsound(speech_sample, sr = sr)\nipd.Audio(s,rate=sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hear Noise\n~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n, sr = sp.loadsound(noise_sample, sr = sr)\nipd.Audio(n,rate=sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hear Signal-to-Noise Ratio 20\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(\n speech_sample,\n noise_sample,\n sr = sr,\n snr = 20)\nipd.Audio(noisyspeech_20snr,rate=sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`snr20` is simply the measured SNR post adjustment fo the noise signal.\nThis is useful to check that the indicated snr is at least close\nto the resulting snr.\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "snr20" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hear Signal-to-Noise Ratio 5\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noisyspeech_5snr, snr5 = sp.dsp.add_backgroundsound(\n speech_sample,\n noise_sample,\n sr = sr,\n snr = 5)\nipd.Audio(noisyspeech_5snr,rate=sr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "snr5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Visualize the Audio Samples\n^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Clean Speech (raw signal)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(speech_sample, feature_type='signal', \n sr = sr, title = 'Speech: ' + word.upper(),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Clean Speech (stft)\n~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(speech_sample, feature_type='stft', \n sr = sr, title = 'Speech: ' + word.upper(), \n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noise_sample, feature_type='signal',\n title = 'Noise: ' + noise.upper(), subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noise_sample, feature_type='stft',\n title = 'Noise: ' + noise.upper(), subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Noisy Speech: SNR 20 (raw signal)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',\n title = '\"{}\" with {} noise at SNR 20'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Noisy Speech: SNR 20 (stft)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',\n title = '\"{}\" with {} noise at SNR 20'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Noisy Speech: SNR 5 (raw signal)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_5snr, sr = sr, feature_type = 'signal',\n title = '\"{}\" with {} noise at SNR 5'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See Noisy Speech: SNR 5 (stft)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'stft',\n title = '\"{}\" with {} noise at SNR 5'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make Combined Sound Longer\n^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pad Speech and Set Total Length \n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(\n speech_sample,\n noise_sample,\n sr = sr,\n snr = 20,\n pad_mainsound_sec = 1,\n total_len_sec = 4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ipd.Audio(noisyspeech_20snr,rate=sr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',\n title = '\"{}\" with {} noise at SNR 20'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make Combined Sound Shorter\n^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set Total Length\n~~~~~~~~~~~~~~~~\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(\n speech_sample,\n noise_sample,\n sr = sr,\n snr = 20,\n total_len_sec = 0.75)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ipd.Audio(noisyspeech_20snr,rate=sr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',\n title = '\"{}\" with {} noise at SNR 20'.format(word.upper(), noise.upper()),\n subprocess=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Wrap the Background Sound\n^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "noisyspeech_20snr, snr20 = sp.dsp.add_backgroundsound(\n speech_sample,\n noise_sample,\n sr = sr,\n snr = 20,\n wrap = True,\n pad_mainsound_sec = 2,\n total_len_sec = 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ipd.Audio(noisyspeech_20snr,rate=sr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp.plotsound(noisyspeech_20snr, sr = sr, feature_type = 'signal',\n title = '\"{}\" with {} noise at SNR 20'.format(word.upper(), noise.upper()),\n subprocess=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 0 }