'''The files module contains functions related to handling audio data files, for example loading audio files, saving audio files, and examing and reformatting audio files.
'''
import numpy as np
import random
import collections
import math
import pathlib
from scipy.io.wavfile import write, read
from scipy.signal import resample
import soundfile as sf
import librosa
import os, sys, tarfile
import inspect
currentdir = os.path.dirname(os.path.abspath(
inspect.getfile(inspect.currentframe())))
packagedir = os.path.dirname(currentdir)
sys.path.insert(0, packagedir)
import soundpy as sp
[docs]def loadsound(filename, sr = None, mono = True, dur_sec = None,
remove_dc = True, use_scipy = False):
'''Loads sound file with scipy.io.wavfile.read or librosa.load (default librosa)
Parameters
----------
filename : str
The filename of the sound to be loaded
sr : int, optional
The desired sample rate of the audio samples. If None,
the sample rate of the audio file will be used.
mono : bool
If True, the samples will be loaded in mono sound. If False,
if the samples are in stereo, they will be loaded in stereo sound.
dur_sec : int, float, optional
The length in seconds of the audio signal.
remove_dc_bias : bool
If True, the mean is subtracted from the signal. This has shown to be
very helpful when working with audio data. (default True)
use_scipy : bool
If False, librosa will be used to load the audiofile. If True,
scipy.io.wavfile and/or soundfile will be used. If the sound file
is not compatible with scipy.io.wavfile.read, this functions converts
the file to .wav format and/or changes the bit depth to be compatible.
(default False)
Returns
-------
data : nd.array [size=(num_samples,) or (num_samples, num_channels)]
The normalized (between 1 and -1) sample data returned
according to the specified settings.
sr : int
The sample rate of the loaded samples.
See Also
--------
soundpy.files.prep4scipywavfile
Prepares audio file for scipy.io.wavfile.read.
soundpy.files.convert_audiofile
Converts audio file to .wav format.
soundpy.files.newbitdepth
Converts audio file to specified bitdepth.
soundpy.dsp.resample_audio
Resampe audio data to a specified sample rate.
soundpy.files.list_possibleformats
Lists the possible formats to load with soundpy.loadsound
librosa.load
The package used to load sound data by default. See `librosa`.
scipy.io.wavfile.read
The package used to load sound if `use_scipy` is set to True.
See `scipy`.
soundpy.dsp.remove_dc_bias
Removes the 'direct current' bias from the signal.
Todo
----
Make librosa data and scipy.io.wavfile data more similar
https://stackoverflow.com/questions/54482346/reading-a-wav-file-with-scipy-and-librosa-in-python
'''
if not use_scipy:
# the sample data will be a litle different from scipy.io.wavfile
# as librosa does a litle extra work with the data
data, sr = librosa.load(filename, sr=sr, mono=mono, duration=dur_sec)
if mono is False and len(data.shape) > 1:
if data.shape[0] < data.shape[1]:
import warnings
msg = '\nWARNING: Most functionality has not been tested '+\
'with stereo sound. Many functions may fail or not work as '+\
'expected. Apologies for the inconvenience!'
warnings.warn(msg)
# change shape from (channels, samples) to (samples, channels)
data = data.T
if remove_dc:
data = sp.dsp.remove_dc_bias(data)
return data, sr
try:
sr2, data = read(filename)
if sr:
if sr2 != sr:
data, sr2 = sp.dsp.resample_audio(data,
sr_original = sr2,
sr_desired = sr)
assert sr2 == sr
else:
sr = sr2
except ValueError:
print("Converting {} to wavfile".format(filename))
try:
filename = sp.files.convert_audiofile(filename, overwrite=False)
except RuntimeError as e:
raise RuntimeError('Try setting `use_scipy` to False in soundpy.loadsound().')
try:
data, sr = loadsound(filename, sr=sr, mono=mono, dur_sec=dur_sec)
print("File saved as {}".format(filename))
except ValueError:
print("Ensure bitdepth is compatible with scipy library")
filename = sp.files.newbitdepth(filename, overwrite=False)
data, sr = loadsound(filename, sr=sr, mono=mono, dur_sec=dur_sec)
# scipy loads data in shape (num_samples, num_channels)
# don't need to transpose as for librosa
if mono and len(data.shape) > 1:
if data.shape[1] > 1:
data = sp.dsp.stereo2mono(data)
if not mono and len(data.shape) > 1:
import warnings
msg = '\nWARNING: Most functionality has not been tested '+\
'with stereo sound. Many functions may fail or not work as '+\
'expected. Apologies for the inconvenience!'
warnings.warn(msg)
# scale samples to be between -1 and 1
data = sp.dsp.scalesound(data, max_val= 1, min_val=-1)
if dur_sec:
numsamps = int(dur_sec * sr)
data = sp.dsp.set_signal_length(data, numsamps)
if remove_dc:
data = sp.dsp.remove_dc_bias(data)
return data, sr
[docs]def savesound(audiofile_name, signal_values, sr, remove_dc=True,
overwrite=False, use_scipy=False, **kwargs):
"""saves the wave at designated path
Parameters
----------
audiofile_name : str or pathlib.PosixPath
path and name the audio is to be saved under. (.wav format)
signal_values : ndarray
values of real signal to be saved
sr : int
sample rate of the audio samples.
remove_dc : bool
If True, the mean is subtracted from the signal. (default True)
overwrite : bool
If True, audio with the same naem will be overwritten. (default False)
use_scipy : bool
If True, scipy.io.wavfile.write will be used. However, file conversion is
limited. Can only save .wav files. Otherwise soundfile.write will be used,
which can save audio under more audio fomats.
**kwargs : additional keyword arguments
The keyword arguments for soundfile.write:
https://pysoundfile.readthedocs.io/en/latest/index.html?highlight=write#soundfile.write
Returns
-------
audiofile_name : pathlib.PosixPath
The new audiofile name
See Also
--------
scipy.io.wavfile.write
soundpy.files.conversion_formats
Lists the possible formats to save audio files if `use_scipy` is False.
soundpy.dsp.remove_dc_bias
Removes the 'direct current' bias from the signal.
"""
audiofile_name = sp.utils.string2pathlib(audiofile_name)
if os.path.exists(audiofile_name) and overwrite is False:
raise FileExistsError('Filename {} already exists.'.format(audiofile_name)+\
'\nSet `overwrite` to True in function savesound() to overwrite.')
directory = audiofile_name.parent
directory = sp.utils.check_dir(directory, make=True)
if remove_dc:
signal_values = sp.dsp.remove_dc_bias(signal_values)
if use_scipy:
write(audiofile_name, sr, signal_values)
else:
# check to see if filename extension and format match:
# if not, warning
if 'format' in kwargs:
if kwargs['format'].lower() != audiofile_name.suffix[1:]:
audiofile_name_orig = audiofile_name
audiofile_name = replace_ext(audiofile_name, kwargs['format'].lower())
import warnings
message = 'The desired format does not match the new file name: '+\
'\nDesired format: {}'.format(kwargs['format'])+\
'\nFilename: {}'.format(audiofile_name_orig) +\
'\nExtension will be adjusted to the format setting: '+\
'\n{}'.format(audiofile_name)
warnings.warn(message)
sf.write(audiofile_name, signal_values, sr, **kwargs)
return audiofile_name
# TODO finish
[docs]def audiofiles_present(directory, recursive=False):
'''Checks to see if audio files are present.
Parameters
----------
directory : str or pathlib.PosixPath
The directory to look for audio.
recursive : bool
If True, all nested directories will be checked as well. (default False)
Returns
-------
bool
True if audio is present; otherwise False.
'''
directory = sp.utils.string2pathlib(directory)
[docs]def collect_audiofiles(directory, hidden_files = False, wav_only=False, recursive=False):
'''Collects all files within a given directory.
This includes the option to include hidden_files in the collection.
Parameters
----------
directory : str or pathlib.PosixPath
The path to where desired files are located.
hidden_files : bool
If True, hidden files will be included. If False, they won't.
(default False)
wav_only : bool
If True, only .wav files will be included. Otherwise, no limit
on file type.
Returns
-------
paths_list : list of pathlib.PosixPath objects
Sorted list of file pathways.
'''
if not isinstance(directory, pathlib.PosixPath):
directory = pathlib.Path(directory)
paths_list = []
# allow all data types to be collected (not only .wav)
if wav_only:
filetype = '*.wav'
else:
filetype = '*'
if recursive:
filetype = '**/' + filetype
for item in directory.glob(filetype):
paths_list.append(item)
# pathlib.glob collects hidden files as well - remove them if they are there:
if not hidden_files:
paths_list = [x for x in paths_list if x.stem[0] != '.']
# ensure only audiofiles:
paths_list = sp.files.ensure_only_audiofiles(paths_list)
return paths_list
[docs]def collect_zipfiles(directory, hidden_files = False, ext='tgz', recursive=False):
'''Collects all zipfiles within a given directory.
This includes the option to include hidden_files in the collection.
Parameters
----------
directory : str or pathlib.PosixPath
The path to where desired files are located.
hidden_files : bool
If True, hidden files will be included. If False, they won't.
(default False)
wav_only : bool
If True, only .wav files will be included. Otherwise, no limit
on file type.
Returns
-------
paths_list : list of pathlib.PosixPath objects
Sorted list of file pathways.
'''
if not isinstance(directory, pathlib.PosixPath):
directory = pathlib.Path(directory)
paths_list = []
# allow all data types to be collected (not only .wav)
if ext[0] == '.':
filetype = '*' + ext
else:
filetype = '*.' + ext
if recursive:
filetype = '**/' + filetype
for item in directory.glob(filetype):
paths_list.append(item)
# pathlib.glob collects hidden files as well - remove them if they are there:
if not hidden_files:
paths_list = [x for x in paths_list if x.stem[0] != '.']
return paths_list
[docs]def ensure_only_audiofiles(audiolist):
possible_extensions = sp.files.list_possibleformats(use_scipy=False)
audiolist_checked = [x for x in audiolist if pathlib.Path(x).suffix in possible_extensions]
if len(audiolist_checked) < len(audiolist):
import warnings
message = 'Some files did not match those acceptable by this program. '+\
'(i.e. non-audio files) The number of files not included: '+\
'{}'.format(len(audiolist)-len(audiolist_checked))
warnings.warn(message)
return audiolist_checked
[docs]def prep4scipywavfile(filename, overwrite=False):
'''Takes soundfile and saves it in a format compatible with scipy.io.wavfile
Parameters
----------
filename : str
Filename of the soundfile to load with scipy.io.wavfile
Returns
-------
filename : str
Filename of the soundfile compatible with scipy.io.wavfile
'''
try:
sr, data = read(filename)
return filename
except ValueError as e:
import pathlib
if pathlib.Path(filename).suffix.lower() != '.wav':
print("Converting file to .wav")
filename = convert_audiofile(filename, overwrite=overwrite)
print("Saved file as {}".format(filename))
elif 'bitdepth' not in str(filename):
print("Ensuring bitdepth is compatible with scipy library")
filename = newbitdepth(filename, overwrite=overwrite)
print("Saved file as {}".format(filename))
else:
#some other error
raise e
filename = prep4scipywavfile(filename, overwrite=overwrite)
return filename
[docs]def convert_audiofile(filename, format_type=None, sr=None, new_dir=False, overwrite=False, use_scipy=False, **kwargs):
'''Converts and saves soundfile as .wav type in same or new directory.
Parameters
----------
filename : str or pathlib.PosixPath
The filename of the audiofile to be converted to .wav type
format_type : str
The format to convert the audio file to. See soundpy.files.conversion_formats.
(defaults to 'wav')
new_dir : str, pathlib.PosixPath, optional
If False, the converted files will be saved in same directory as originals.
If a path is provided, the converted files will be saved there. If no such directory
exists, one will be created.
sr : int, optional
The sample rate to be applied to the signal. If none supplied, the sample rate
of the original file will be used.
**kwargs : additional keyword arguments
The keyword arguments for soundfile.write:
https://pysoundfile.readthedocs.io/en/latest/index.html?highlight=write#soundfile.write
Returns
-------
f_wavfile : pathlib.PosixPath
The filename / path where the audio file is saved.
Examples
--------
>>> audiofile = './example/audio.wav'
# in same directory
>>> audiofile_flac = sp.files.convert_audiofile(audiofile, format_type='flac')
>>> audiofile_flac
PosixPath('example/audio.flac')
# in new directory
>>> audiofile_flac = sp.files.convert_audiofile(audiofile, format_type='flac',
new_dir = './examples2/')
>>> audiofile_flac
PosixPath('examples2/audio.flac')
>>> # can establish desired conversion format in `new_dir`
>>> audiofile_ogg = sp.files.convert_audiofile(audiofile,
new_dir = './examples2/audio.ogg')
>>> audiofile_ogg
PosixPath('audiodata2/audio.ogg')
See Also
--------
soundpy.files.conversion_formats
Lists the possible formats to convert audio files.
soundpy.files.list_possibleformats
Lists the possible formats to load with soundpy.loadsound
'''
import pathlib
import os
try:
f = pathlib.Path(filename)
except TypeError:
raise TypeError('Function convert_audiofile expected input of type string '+\
'or a pathlib object, not type {}.'.format(type(filename)))
if not f.suffix:
raise TypeError('Function convert_audiofile expected a path with an '+\
'audio extension, not input: \n', filename)
if not f.suffix in sp.files.list_possibleformats(use_scipy=False):
raise TypeError('This software cannot process audio in {}'.format(f.suffix)+\
' format. We apologize for the inconvenience.')
# ensure filename exists:
if not os.path.exists(filename):
raise FileNotFoundError('Could not find audio file at the following '+\
'location\n{}'.format(filename))
# set format_type to match suffix of new pathway (if it is not set already)
if new_dir:
new_dir = pathlib.Path(new_dir)
if new_dir.suffix:
if format_type is None:
format_type = new_dir.suffix[1:].upper()
# set default if format_type is None
if format_type is None:
format_type = 'WAV'
# establish the path to save updated file.
if new_dir:
# check if new_dir is a directory or filename
if new_dir.suffix:
new_filename = new_dir
new_dir = new_dir.parent
# check for mismatch in file type and file name
if format_type.lower() != new_filename.suffix[1:].lower():
import warnings
message = '\nWARNING: `format_type` {} '.format(format_type) +\
'does not match format of `new_dir`: \n{}.\n'.format(new_filename) + \
'Saving according to the `format_type`.'
warnings.warn(message)
new_filename = replace_ext(new_filename, format_type.lower())
else:
new_f = replace_ext(f, format_type.lower())
# change filename to new directory
new_filename = new_dir.joinpath(new_f.name)
# check to make sure new_dir exists
new_dir = sp.utils.check_dir(new_dir, make=True)
else:
new_filename = replace_ext(f, format_type.lower())
# load audio samples with soundfile, then save them as new audio file.
try:
data, sr = sf.read(filename, samplerate=sr)
except RuntimeError as e:
data, sr = librosa.load(filename, sr=sr)
if not overwrite:
if os.path.exists(new_filename):
import warnings
message = '\nWARNING: File {} already exists at this '.format(new_filename)+\
'location. Set `overwrite` to True to overwrite it. Not overwritten.'
warnings.warn(message)
return new_filename
if not use_scipy:
format = format_type.upper()
new_filename = sp.savesound(new_filename, data, sr,
overwrite=overwrite, use_scipy=use_scipy,
format=format)
return new_filename
[docs]def replace_ext(filename, extension):
'''Adds or replaces an extension in the filename
Parameters
----------
filename : str or pathlib.PosixPath
Filename with the missing or incorrect extension
extension : str
The correct extension for the given filename.
Returns
-------
file_newext : pathlib.PosixPath
The filename with the new extension
'''
if isinstance(filename, str):
import pathlib
filename = pathlib.Path(filename)
#filestring = str(filename)[:len(str(filename))-len(filename.suffix)]
f_dir = filename.parent
f_name = filename.stem
if extension[0] != '.':
extension = '.'+extension
new_filename = f_dir.joinpath(f_name + extension)
return new_filename
[docs]def match_ext(filename1, filename2):
'''Matches the file extensions.
If both have extensions, default set to that of `filename1`.
'''
import pathlib
f1 = pathlib.Path(filename1)
f2 = pathlib.Path(filename2)
if not f1.suffix:
if not f2.suffix:
raise TypeError('No file extension provided. Check the filenames.')
else:
extension = f2.suffix
else:
extension = f1.suffix
if f1.suffix != extension:
f1 = replace_ext(f1, extension)
else:
f1 = str(f1)
if f2.suffix != extension:
f2 = replace_ext(f2, extension)
else:
f2 = str(f2)
return f1, f2
[docs]def newbitdepth(wave, bitdepth=16, newname=None, overwrite=False):
'''Convert bitdepth to 16 or 32, to ensure compatibility with scipy.io.wavfile
Scipy.io.wavfile is easily used online, for example in Jupyter notebooks.
Reference
---------
https://stackoverflow.com/questions/44812553/how-to-convert-a-24-bit-wav-file-to-16-or-32-bit-files-in-python3
'''
if bitdepth == 16:
newbit = 'PCM_16'
elif bitdepth == 32:
newbit = 'PCM_32'
else:
raise ValueError('Provided bitdepth is not an option. Available bit depths: 16, 32')
data, sr = sf.read(wave)
if overwrite:
if newname is None:
newname = wave
sf.write(newname, data, sr, subtype=newbit)
savedname = newname
else:
if not newname:
newname = adjustname(wave, adjustment='_bitdepth{}'.format(bitdepth))
print("No new filename provided. Saved file as '{}'".format(newname))
sf.write(newname, data, sr, subtype=newbit)
elif newname:
#make sure new extension matches original extension
wave, newname = match_ext(wave, newname)
sf.write(newname, data, sr, subtype=newbit)
savedname = newname
return savedname
[docs]def adjustname(filename, adjustment=None):
'''Adjusts filename.
Parameters
----------
filename : str
The filename to be adjusted
adjustment : str, optional
The adjustment to add to the filename. If None,
the string '_adj' will be added.
Returns
-------
fname : str
The adjusted filename with the original extension
Examples
--------
>>> adjustname('happy.md')
'happy_adj.md'
>>> adjustname('happy.md', '_not_sad')
'happy_not_sad.md'
'''
import pathlib
f = pathlib.Path(filename)
fname = f.stem
if adjustment:
fname += adjustment
else:
fname += '_adj'
fname += f.suffix
return fname
[docs]def delete_dir_contents(directory, remove_dir = False):
'''
https://stackoverflow.com/a/28834214
'''
d = sp.utils.string2pathlib(directory)
for sub in d.iterdir():
if sub.is_dir():
delete_dir_contents(sub)
else:
sub.unlink()
if remove_dir:
d.rmdir()
[docs]def matching_filenames(list1, list_of_lists):
list1_files = []
if isinstance(list1[0], tuple):
for item in list1:
if len(item) != 2:
# ensures expected length of 2: (encoded_label, pathway)
raise ValueError('Expected a list of tuple pairs: encoded '+\
'label and associated pathway. Received tuple of length ', len(item))
# checks to ensure encoded label comes first
if isinstance(item[0], int) or isinstance(item[0], str) and item[0].isdigit():
list1_files.append(item[1])
elif isinstance(list1[0], str) or isinstance(list1[0], pathlib.PosixPath) or isinstance(list1[0], pathlib.PurePath):
list1_files = list1
other_lists_files = []
for l in list_of_lists:
if isinstance(l[0], tuple):
for item in l:
if len(item) != 2:
# ensures expected length of 2: (encoded_label, pathway)
raise ValueError('Expected a list of tuple pairs: encoded '+\
'label and associated pathway. Received tuple of length ', len(item))
# checks to ensure encoded label comes first
if isinstance(item[0], int) or isinstance(item[0], str) and item[0].isdigit():
# ensure pathway is string, not pathlib (for iteration purposes)
other_lists_files.append(str(item[1]))
elif isinstance(l[0], str) or isinstance(l[0], pathlib.PosixPath) or isinstance(l[0], pathlib.PurePath):
other_lists_files.append(l)
# ensure list of lists is flat list
flatten = lambda l: [item for sublist in l for item in sublist]
other_lists_files = flatten(other_lists_files)
contanimated_files = []
for item in list1_files:
if isinstance(item, str):
item = sp.string2pathlib(item)
fname = item.stem
fname_parts = fname.split('-')
fname_head = fname_parts[:-1]
fname_head = '-'.join(fname_head)
for i in other_lists_files:
if fname_head in i:
contanimated_files.append(item)
break
return contanimated_files
[docs]def remove_contaminated_files(list1, contaminated_files):
# ensure files are strings not pathlib.PosixPath objects
contaminated_files = [str(x) for x in contaminated_files]
remove_idx = []
if isinstance(list1[0], tuple):
for i, tuple_pair in enumerate(list1):
pathway = str(tuple_pair[1])
if pathway in contaminated_files:
remove_idx.append(i)
list_uncont = [x for j, x in enumerate(list1) if j not in remove_idx]
return list_uncont