{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Train a Denoising Autoencoder\n\n\nTrain a denoising autoencoder with clean and noisy acoustic features.\n\nTo see how soundpy implements this, see `soundpy.models.builtin.denoiser_train`, \n`soundpy.builtin.denoiser_feats` and `soundpy.builtin.create_denoise_data`.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import os, sys\nimport inspect\ncurrentdir = os.path.dirname(os.path.abspath(\n    inspect.getfile(inspect.currentframe())))\nparentdir = os.path.dirname(currentdir)\nparparentdir = os.path.dirname(parentdir)\npackagedir = os.path.dirname(parparentdir)\nsys.path.insert(0, packagedir)\n\nimport matplotlib.pyplot as plt\nimport IPython.display as ipd\npackage_dir = '../../../'\nos.chdir(package_dir)\nsp_dir = package_dir"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's import soundpy for handling sound\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import soundpy as sp"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "As well as the deep learning component of soundpy\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from soundpy import models as spdl"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Prepare for Training: Data Organization\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Designate path relevant for accessing audiodata\n\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "I will load previously extracted features (sample data), see `soundpy.feats.save_features_datasets` or `soundpy.builtin.denoiser_feats`\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "feature_extraction_dir = '{}audiodata2/example_feats_models/'.format(sp_dir)+\\\n    'denoiser/example_feats_fbank/'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "What is in this folder?\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "feature_extraction_dir = sp.utils.check_dir(feature_extraction_dir)\nfiles = list(feature_extraction_dir.glob('*.*'))\nfor f in files:\n    print(f.name)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "The .npy files contain the features themselves, in train, validation, and\ntest datasets:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "files = list(feature_extraction_dir.glob('*.npy'))\nfor f in files:\n    print(f.name)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "The .csv files contain information about how the features were extracted\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "files = list(feature_extraction_dir.glob('*.csv'))\nfor f in files:\n    print(f.name)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We'll have a look at which features were extracted and other settings:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "feat_settings = sp.utils.load_dict(\n    feature_extraction_dir.joinpath('log_extraction_settings.csv'))\nfor key, value in feat_settings.items():\n    print(key, ' --> ', value)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "For more about these settings, see `soundpy.feats.save_features_datasets`.\n\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We'll have a look at the audio files that were assigned \nto the train, val, and test datasets.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "audio_datasets = sp.utils.load_dict(\n    feature_extraction_dir.joinpath('audiofiles_datasets_clean.csv'))\ncount = 0\nfor key, value in audio_datasets.items():\n    print(key, ' --> ', value)\n    count += 1\n    if count > 5:\n        break"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Built-In Functionality: soundpy does everything for you\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nFor more about this, see `soundpy.builtin.denoiser_train`.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "model_dir, history = spdl.denoiser_train(\n    feature_extraction_dir = feature_extraction_dir,\n    epochs = 10)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Where the model and logs are located:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "model_dir"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's plot how the model performed (on this mini dataset)\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\nplt.plot(history.history['loss'])\nplt.plot(history.history['val_loss'])\nplt.title('model loss')\nplt.ylabel('loss')\nplt.xlabel('epoch')\nplt.legend(['train', 'val'], loc='upper right')\nplt.savefig('loss.png')"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.2"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}