'''The augment module includes functions related to augmenting audio data.
These functions pull from implementations performed in research. 
Other resources for augmentation (not included in soundpy functionality):
Ma, E. (2019). NLP Augmentation. https://github.com/makcedward/nlpaug
Park, D. S., Chan, W., Zhang, Y., Chiu, C., Zoph, B., Cubuk, E. D., & Le, Q. V.
(2019). Google Brain. arxiv.org/pdf/1904.08779.pdf
Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
improving animal audio classification. Ecological Informatics, 57, 101084. 
https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084:
1.Signal speed scaling by a random number in[0.8,1.2](SpeedupFactoryRange).
2.Pitch shift by a random number in [−2,2]semitones(SemitoneShiftRange).
3.Volume increase/decrease by a random number in [−3,3]dB(VolumeGainRange).
4.Addition of random noise in the range [0,10]dB(SNR).
5.Time shift in the range [−0.005,0.005]seconds(TimeShiftRange).
'''
###############################################################################
import os, sys
import inspect
currentdir = os.path.dirname(os.path.abspath(
    inspect.getfile(inspect.currentframe())))
packagedir = os.path.dirname(currentdir)
sys.path.insert(0, packagedir)
import numpy as np
import math
import librosa
import pathlib
import soundpy as sp
[docs]def speed_increase(sound, sr, perc=0.15, **kwargs):
    '''Acoustic augmentation of speech.
    
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    
    Ko, T., Peddinti, V., Povey, D., & Khudanpur (2015). Audio Augmentation for 
    Speech Recognition. Interspeech. 
    
    W. Verhelst and M. Roelands, “An overlap-add technique based on
    waveform similarity (wsola) for high quality time-scale modifica-
    tion of speech,” in Proceedings of the International Conference on
    Acoustics, Speech and Signal Processing (ICASSP), vol. 2, April
    1993, pp. 554–557 vol.2.
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    # if entered 50 instead of .50, turns 50 into .50
    if perc > 1:
        while perc > 1:
            perc *= .01
            if perc <= 1:
                break
    rate = 1. + perc
    y_fast = librosa.effects.time_stretch(data, rate)
    return y_fast 
[docs]def speed_decrease(sound, sr, perc=0.15, **kwargs):
    '''Acoustic augmentation of speech. 
    
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    # if entered 50 instead of .50, turns 50 into .50
    if perc > 1:
        while perc > 1:
            perc *= .01
            if perc <= 1:
                break
    rate = 1. - perc
    y_slow = librosa.effects.time_stretch(data, rate)
    return y_slow 
[docs]def time_shift(sound, sr, random_seed = None, **kwargs):
    '''Acoustic augmentation of sound (probably not for speech).
    
    Applies random shift of sound by dividing sound into 2 sections and 
    switching them.
    
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    switched = sp.augment.shufflesound(data, sr=sr, 
                                          num_subsections = 2, 
                                          random_seed = random_seed)
    return switched 
    
[docs]def shufflesound(sound, sr, num_subsections = 2, random_seed = None, **kwargs):
    '''Acoustic augmentation of noise or background sounds.
    
    This separates the sound into `num_subsections` and pseudorandomizes
    the order.
    
    References
    ----------
    Inoue, T., Vinayavekhin, P., Wang, S., Wood, D., Munawar, A., Ko, B. J.,
    Greco, N., & Tachibana, R. (2019). Shuffling and mixing data augmentation 
    for environmental sound classification. Detection and Classification of 
    Acoustic Scenes and Events 2019. 25-26 October 2019, New York, NY, USA
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    subsection_length = len(data) // num_subsections
    order = np.arange(num_subsections)
    if random_seed is not None:
        np.random.seed(random_seed)
    np.random.shuffle(order)
    section_dict = {}
    sample = 0
    for i in range(num_subsections):
        if i == num_subsections-1:
            section = data[sample:]
        else:
            section = data[sample:sample+subsection_length]
        section_dict[i] = section
        sample += subsection_length
    # combine samples in new order:
    samples_shuffled = np.array([])
    for i in order:
        samples_shuffled = np.concatenate((samples_shuffled, section_dict[i]),axis=0)
    return samples_shuffled 
[docs]def add_white_noise(sound, sr, noise_level=0.01, snr=10, random_seed=None, **kwargs):
    '''
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr)
        assert sr2 == sr
    n = sp.dsp.generate_noise(num_samples = len(data), 
                                amplitude=noise_level, 
                                random_seed=random_seed)
    if isinstance(snr, list):
        snr = np.random.choice(snr)
    sound_n, snr = sp.dsp.add_backgroundsound(data, n, sr = sr, snr=snr, **kwargs)
    return sound_n 
[docs]def harmonic_distortion(sound, sr, **kwargs):
    '''Applies sin function five times.
    
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    data = 2*np.pi*data
    count = 0
    while count < 5:
        data = np.sin(data)
        count += 1
    return data 
    
[docs]def pitch_increase(sound, sr, num_semitones = 2, **kwargs):
    '''
    
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    y_i = librosa.effects.pitch_shift(data, sr=sr, n_steps = num_semitones)
    return y_i 
[docs]def pitch_decrease(sound, sr, num_semitones = 2, **kwargs):
    '''
    
    References
    ----------
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr, **kwargs)
        assert sr2 == sr
    y_d = librosa.effects.pitch_shift(data, sr=sr, n_steps = -num_semitones)
    return y_d 
      
# TODO how to control output size without losing frequency data?
# basically how to scale down dimension of frequencies after warping?
# https://docs.scipy.org/doc/scipy/reference/tutorial/ndimage.html#interpolation-functions
# scikit-image resize (only powerspectrum)
# https://stackoverflow.com/questions/23918036/interpolate-whole-arrays-of-complex-numbers
[docs]def vtlp(sound, sr, a = (0.8,1.2), random_seed = None,
         oversize_factor = 16, win_size_ms = 50, percent_overlap = 0.5,
         bilinear_warp = True, real_signal = True, fft_bins = 1024, window = 'hann',
         zeropad = True, expected_shape = None, visualize = False):
    '''Applies vocal tract length perturbations directly to dft (oversized) windows.
    
    References
    ----------
    Kim, C., Shin, M., Garg, A., & Gowda, D. (2019). Improved vocal tract length perturbation 
    for a state-of-the-art end-to-end speech recognition system. Interspeech. September 15-19, 
    Graz, Austria.
    
    Nanni, L., Maguolo, G., & Paci, M. (2020). Data augmentation approaches for 
    improving animal audio classification. Ecological Informatics, 57, 101084. 
    https://doi.org/https://doi.org/10.1016/j.ecoinf.2020.101084
    '''
    if isinstance(sound, np.ndarray):
        data = sound
    else:
        data, sr2 = sp.loadsound(sound, sr=sr)
        assert sr2 == sr
    if random_seed is not None:
        np.random.seed(random_seed)
    if isinstance(a, tuple) or isinstance(a, list):
        vtlp_a = np.random.choice(np.arange(min(a), max(a)+.1, 0.1)  )
    elif isinstance(a, int) or isinstance(a, float):
        vtlp_a = a
    else:
        vtlp_a = None
    if isinstance(vtlp_a, int) or isinstance(vtlp_a, float) or isinstance(vtlp_a, np.int_) \
        
or isinstance(vtlp_a, np.float_):
            pass
    else:
        raise TypeError('Function `soundpy.augment.vtlp` expected a to be an int or float, or'+\
            
' a list / tuple of ints, or floats; not of type {}'.format(type(a)))
    frame_length = sp.dsp.calc_frame_length(win_size_ms, sr)
    num_overlap_samples = int(frame_length * percent_overlap)
    num_subframes = sp.dsp.calc_num_subframes(len(data),
                                                frame_length = frame_length,
                                                overlap_samples = num_overlap_samples,
                                                zeropad = zeropad)
    max_freq = sr/2.
    if expected_shape is not None:
        # expects last column to represent the number of relevant frequency bins
        #fft_bins = expected_shape[-1]
        fft_bins = (expected_shape[-1]-1) * 2 
    if fft_bins is None:
        fft_bins = int(win_size_ms * sr // 1000)
    total_rows = fft_bins * oversize_factor
    # initialize empty matrix to fill dft values into
    stft_matrix = sp.dsp.create_empty_matrix(
        (num_subframes,total_rows), complex_vals = True)
    
    section_start = 0
    window_frame = sp.dsp.create_window(window, frame_length)
    for frame in range(num_subframes):
        section = data[section_start:section_start+frame_length]
        section = sp.dsp.apply_window(section, window_frame, zeropad = zeropad)
        # apply dft to large window - increase frequency resolution during warping
        section_fft = sp.dsp.calc_fft(section, 
                                        real_signal = real_signal,
                                        fft_bins = total_rows,
                                        )
        if bilinear_warp:
            section_warped = sp.dsp.bilinear_warp(section_fft, vtlp_a)
        else:
            section_warped = sp.dsp.piecewise_linear_warp(section_fft, vtlp_a,
                                                                max_freq = max_freq)
        if real_signal:
            section_warped = section_warped[:len(section_warped)]
        else:
            section_warped = section_warped[:len(section_warped)//2 + 1]
        stft_matrix[frame][:len(section_warped)] = section_warped
        section_start += (frame_length - num_overlap_samples)
    if expected_shape is not None:
        stft_matrix = stft_matrix[:,:len(section_warped)]
        # TODO: find out how to reduce resolution of frequency
        # this technically works but is 1) slow and 2) loses lots of info
        if oversize_factor > 1:
            import skimage
            from skimage.transform import resize
            power_matrix = sp.dsp.calc_power(stft_matrix)
            stft_matrix = resize(power_matrix, expected_shape)
            import warnings
            msg = '\nWARNING: Only the power spectrum of the VTLP augmented signal'+\
                
' can be returned due to resizing the augmentation from '+\
                    
'{} to {}'.format(power_matrix.shape, expected_shape)
            warnings.warn(msg)
            #for i in np.arange(0, int(np.sqrt(oversize_factor))):
                #stft_matrix = sp.feats.reduce_dim(stft_matrix, axis=1)
        # ensures matches expected_shape
        stft_matrix = sp.feats.adjust_shape(stft_matrix, expected_shape)
    else:
        stft_matrix = stft_matrix[:,:len(section_warped)]
    if visualize:
        sp.feats.plot(stft_matrix, feature_type = 'stft', subprocess=True, 
                    name4pic = 'vtlp_{}.png'.format(sp.utils.get_date()),
                    title = 'size: {}'.format(stft_matrix.shape),
                    save_pic=True)
    return stft_matrix, vtlp_a 
[docs]def get_augmentation_dict():
    '''Returns dictionary with augmentation options as keys and values set to False.
    
    Examples
    --------
    >>> import soundpy as sp
    >>> ad = sp.augment.get_augmentation_dict()
    >>> ad
    {'speed_increase': False,
    'speed_decrease': False,
    'time_shift': False,
    'shufflesound': False,
    'add_white_noise': False,
    'harmonic_distortion': False,
    'pitch_increase': False,
    'pitch_decrease': False,
    'vtlp': False}
    >>> # to set augmentation to True:
    >>> ad['add_white_noise'] = True
    >>> ad
    {'speed_increase': False,
    'speed_decrease': False,
    'time_shift': False,
    'shufflesound': False,
    'add_white_noise': True,
    'harmonic_distortion': False,
    'pitch_increase': False,
    'pitch_decrease': False,
    'vtlp': False}
    '''
    base_dict = dict([('speed_increase', False),
                      ('speed_decrease', False),
                      ('time_shift', False),
                      ('shufflesound', False),
                      ('add_white_noise', False),
                      ('harmonic_distortion', False),
                      ('pitch_increase', False),
                      ('pitch_decrease', False),
                      ('vtlp', False),
                      ])
    return base_dict 
[docs]def list_augmentations():
    '''Lists available augmentations.
    
    Examples
    --------
    >>> import soundpy as sp
    >>> print(sp.augment.list_augmentations())
    Available augmentations:
            speed_increase
            speed_decrease
            time_shift
            shufflesound
            add_white_noise
            harmonic_distortion
            pitch_increase
            pitch_decrease
            vtlp
    '''
    augmentation_dict = sp.augment.get_augmentation_dict()
    aug_list = '\t'+'\n\t'.join(str(x) for x in augmentation_dict.keys())
    augmentations = 'Available augmentations:\n '+ aug_list
    return augmentations 
    
# TODO test to see if list can be applied to all augmentations, not just 'add_white_noise'
[docs]def get_augmentation_settings_dict(augmentation):
    '''Returns default settings of base function for augmentation.
    
    Parameters
    ----------
    augmentation : str 
        The augmentation of interest.
    
    Returns
    -------
    aug_defaults : dict 
        A dictionary with the base augmentation function parameters as keys  
        and default values as values. 
    
    Examples
    --------
    >>> import soundpy as sp
    >>> d = sp.augment.get_augmentation_settings_dict('speed_decrease')
    >>> d
    {'perc': 0.15}
    >>> # can use this dictionary to apply different values for augmentation
    >>> d['perc'] = 0.1
    >>> d
    {'perc': 0.1}
    >>> # to build a dictionary with several settings:
    >>> many_settings_dict = {}
    >>> many_settings_dict['add_white_noise'] = sp.augment.get_augmentation_settings_dict('add_white_noise')
    >>> many_settings_dict['pitch_increase'] = sp.augment.get_augmentation_settings_dict('pitch_increase')
    >>> many_settings_dict
    {'add_white_noise': {'noise_level': 0.01, 'snr': 10, 'random_seed': None},
    'pitch_increase': {'num_semitones': 2}}
    >>> # change 'snr' default values to list of several values
    >>> # this would apply white noise at either 10, 15, or 20 SNR, at random
    >>> many_settings_dict['add_white_noise']['snr'] = [10, 15, 20]
    >>> # change number of semitones pitch increase is applied
    >>> many_settings_dict['pitch_increase']['num_semitones'] = 1
    >>> many_settings_dict
    {'add_white_noise': {'noise_level': 0.01,
    'snr': [10, 15, 20],
    'random_seed': None},
    'pitch_increase': {'num_semitones': 1}}
    Raises
    ------
    ValueError 
        If `augmentation` does not match available augmentations.
    
    See Also
    --------
    soundpy.models.dataprep.augment_features
        The above dictionary example `many_settings_dict` can be applied under the
        parameter `augment_settings_dict` to apply augmentation settings when 
        augmenting data, for example, within a generator function. See `soundpy.models.dataprep.GeneratorFeatExtraction`.
    '''
    if augmentation == 'speed_increase':
        aug_defaults = sp.utils.get_default_args(sp.augment.speed_increase)
    elif augmentation == 'speed_decrease':
        aug_defaults = sp.utils.get_default_args(sp.augment.speed_decrease)        
    elif augmentation == 'time_shift':
        aug_defaults = sp.utils.get_default_args(sp.augment.time_shift)
    elif augmentation == 'shufflesound':
        aug_defaults = sp.utils.get_default_args(sp.augment.shufflesound)
    elif augmentation == 'add_white_noise':
        aug_defaults = sp.utils.get_default_args(sp.augment.add_white_noise)
    elif augmentation == 'harmonic_distortion':
        aug_defaults = sp.utils.get_default_args(sp.augment.harmonic_distortion)
    elif augmentation == 'pitch_increase':
        aug_defaults = sp.utils.get_default_args(sp.augment.pitch_increase)
    elif augmentation == 'pitch_decrease':
        aug_defaults = sp.utils.get_default_args(sp.augment.pitch_decrease)
    elif augmentation == 'vtlp':
        aug_defaults = sp.utils.get_default_args(sp.augment.vtlp)
    else:
        raise ValueError('Receieved `augmentation` "{}"'.format(augmentation)+\
            
' which is not included in available augmentations:\n{}'.format(
                sp.augment.list_augmentations()))
    return aug_defaults