'''Utils module covers functions that are useful for soundpy but are not
directly related to sound data.
'''
import os, sys
import csv
import numpy as np
import datetime
import pathlib
import soundfile as sf
# for converting string lists back into list:
import ast
import os, sys
import inspect
currentdir = os.path.dirname(os.path.abspath(
inspect.getfile(inspect.currentframe())))
packagedir = os.path.dirname(currentdir)
sys.path.insert(0, packagedir)
import soundpy as sp
# TODO make str path into Pathlib.PosixPath
[docs]def path_or_samples(input_value):
'''Checks whether `input_value` is a path or sample data. Does not check path validity.
This is useful for functions that take both pathways to audio as well as
pre-loaded audio data.
Parameters
----------
input_value : str, pathlib.PosixPath, or tuple [size= ( (samples,), sr)] or np.ndarray [size = (samples, )]
Returns
-------
'path' or 'samples' : str
Examples
--------
>>> import numpy as np
>>> # create some example samples and sample rate
>>> samples = np.array([1,2,3,2,1,0])
>>> sr = 5
>>> path_or_samples( (samples, sr) )
'samples'
>>> # expects both audio samples and sr
>>> path_or_samples(samples)
TypeError: The input for `path_or_samples` expected a str, pathlib.PosixPath, or tuple with samples and sample rate, not type <class 'numpy.ndarray'>
>>> # create example string pathway
>>> path_or_samples('my_audio.wav')
'path'
>>> # create pathlib.PosixPath object
>>> import pathlib
>>> path_or_samples(pathlib.Path('my_audio.wav')
'path'
'''
if isinstance(input_value, str):
return 'path'
elif isinstance(input_value, pathlib.PosixPath):
return 'path'
elif isinstance(input_value, tuple):
if isinstance(input_value[0], np.ndarray):
return 'samples'
elif isinstance(input_value, np.ndarray):
return 'samples'
else:
raise TypeError('The input for `path_or_samples` expected a str, '+\
'pathlib.PosixPath, np.ndarray, or tuple with samples and sample rate, '+\
'not type {}'.format(type(input_value)))
[docs]def get_default_args(func):
'''
References
----------
stackoverflow answer by mgilson:
link: https://stackoverflow.com/a/12627202
license: https://creativecommons.org/licenses/by-sa/3.0/
'''
signature = inspect.signature(func)
return {
k: v.default
for k, v in signature.parameters.items()
if v.default is not inspect.Parameter.empty
}
[docs]def match_dtype(array1, array2):
'''Match the dtype of the second array to the first.
Parameters
----------
array1 : np.ndarray
The numpy array with the dataype to be adjusted and returned.
array2 : np.ndarray
The numpy array with the orginal or desired datatype.
Returns
-------
array1 : np.ndarray
The `array1` with the dtype of `array2`
'''
array1 = array1.astype(array2.dtype)
assert array1.dtype == array2.dtype
return array1
[docs]def get_date():
'''Get a string containing month, day, hour, minute, second and millisecond.
This is useful for creating a unique filename.
Parameters
----------
None
Returns
-------
time_str : str
A string containing the date and time.
Examples
--------
>>> date = get_date()
>>> date
'6m18d1h16m32s295ms'
'''
time = datetime.datetime.now()
time_str = "{}m{}d{}h{}m{}s{}ms".format(time.month,
time.day,
time.hour,
time.minute,
time.second,
int(time.microsecond*0.001))
return(time_str)
[docs]def check_dir(directory, make=True, append=True):
'''Checks if directory exists and creates it if indicated.
Parameters
----------
directory : str or pathlib.PosixPath
The directory of interest
make : bool
Whether or not the directory should be created or just checked to
ensure it exists. (default True)
append : bool
If True, if a directory with the same name exists, new items will be
saved into the old directory. Otherwise, an error will be raised.
(default True)
Returns
-------
directory : pathlib.PosixPath
If a directory could be created or confirmed to exist, the directory
path will be returned. Otherwise Errors will be raised.
'''
import os
try:
if not isinstance(directory, pathlib.PosixPath):
directory = pathlib.Path(directory)
# check to ensure the pathway does not have an extension
if directory.suffix:
raise TypeError('Expected pathway without extension. Did you mean to set \n~ '#\
+str(directory)+' ~\nas a directory? If so, remove extension.')
if not os.path.exists(directory):
if make:
try:
os.mkdir(directory)
except FileNotFoundError:
# parent directories might not exist
directory = create_nested_dirs(directory)
else:
raise FileNotFoundError('The following directory does not exist: '+\
'\n{}'.format(directory))
else:
if not append:
raise FileExistsError('The following directory already exists: '+\
'\n{}'.format(directory)+'\nTo write into this directory, '+\
'set `append` to True.')
except PermissionError:
raise PermissionError('Problem reading file. Check to ensure the path '+\
'does not start with a slash: {}'.format(directory))
return directory
[docs]def create_nested_dirs(directory):
'''Creates directory even if several parent directories don't exist.
Parameters
----------
directory : str, pathlib.PosixPath
The directory to be created.
Returns
-------
directory : pathlib.PosixPath
If successful, the directory path that has been created.
Examples
--------
>>> # First an unsucessful creation of nested directory
>>> import os
>>> new_dir = './testdir/testdir/testdir/'
>>> os.mkdir(new_dir)
FileNotFoundError: [Errno 2] No such file or directory: './testdir/testdir/testdir/'
>>> # try again with create_nested_dirs()
>>> directory = create_nested_dirs(new_dir)
>>> directory
PosixPath('testdir/testdir/testdir')
'''
if not isinstance(directory, pathlib.PosixPath):
directory = pathlib.Path(directory)
try:
os.mkdir(directory)
except FileNotFoundError:
path_parent = create_nested_dirs(directory.parent)
local_dirname = directory.name
directory = path_parent.joinpath(local_dirname)
os.mkdir(directory)
return directory
[docs]def string2pathlib(pathway_string):
'''Turns string path into pathlib.PosixPath object.
This is useful when working with pathways from varying operating
systems. Windows, Linux, and Mac have different ways of organizing
pathways and pathlib turns strings from these different versions
into a pathlib object that can be understood by the software
regardless of the system. (At least I hope so..)
Parameters
----------
pathway_string : str or pathlib.PosixPath
The pathway to be turned into a pathlib object, if need be.
Returns
-------
pathway_string : pathlib.PosixPath
The pathway as a pathlib object.
Examples
--------
>>> pathway = 'folder/way2go.txt'
>>> pathlib_pathway = string2pathlib(pathway)
>>> pathlib_pathway
PosixPath('folder/way2go.txt')
'''
if not isinstance(pathway_string, pathlib.PosixPath):
try:
pathway_string = pathlib.Path(pathway_string)
except TypeError:
raise TypeError('Function string2pathlib expects a string or '+\
'pathlib object, not input of type {}'.format(type(pathway_string)))
return pathway_string
[docs]def restore_dictvalue(value_string):
'''Takes dict value and converts it to its original type.
When loading a dictionary from a .csv file, the values are strings.
This function handles integers, floats, tuples, and some strings.
It also has been suited to handle a list of audio files or list of
pathlib.PosixPath objects.
Warning: no extensive testing has been completed for this function.
It might not handle all value types as expected.
Parameters
----------
value_string : str
The dictionary value that was converted into a string object .
Returns
-------
value_original_type : list, int, tuple, string, float, etc.
The value converted back to its original type.
Raises
------
ValueError : If passed a nested list of pathlib.PosixPath objects.
Examples
--------
>>> input_string = "[PosixPath('data/audio/vacuum/vacuum1.wav')]"
>>> type(input_string)
<class 'str'>
>>> typelist = string2list(input_string)
>>> typelist
[PosixPath('data/audio/vacuum/vacuum1.wav')]
>>> type(typelist)
<class 'list'>
>>> # Get type of the object
>>> type(typelist[0])
pathlib.PosixPath
>>> # Example with a list of tuples, i.e. label and audio file pairs:
>>> input_string = "[(2, PosixPath('data/audio/vacuum/vacuum1.wav')), '+\
'(1, PosixPath('data/audio/vacuum/vacuum2.wav'))]"
>>> labelaudio_pairs = string2list(input_string)
>>> labelaudio_pairs
[(2, PosixPath('data/audio/vacuum/vacuum1.wav')),
(1, PosixPath('data/audio/vacuum/vacuum2.wav'))]
>>> type(labelaudio_pairs)
list
>>> type(labelaudio_pairs[0])
tuple
>>> type(labelaudio_pairs[0][0])
int
>>> type(labelaudio_pairs[0][1])
pathlib.PosixPath
'''
# only works with type string data
if not isinstance(value_string, str):
return value_string
try:
# first use reliable module to turn string into original type
value_original_type = ast.literal_eval(value_string)
except SyntaxError:
# most likely a string with spaces or something
# can stay string
value_original_type = str(value_string)
# this handles a list of audio files
except ValueError:
if not 'Path' in value_string:
# not handling a pathlib object, just a string.
return value_string
# ast doesn't handle lists of pathlib.PosixPath objects
# TODO further testing
# remove the string brackets '[' and ']'
list_remove_brackets = value_string[1:-1]
if list_remove_brackets[0] == '(' and list_remove_brackets[-1] == ')':
# list of tuples
tuple_string = list_remove_brackets.split('), ')
elif list_remove_brackets[0] == '[' and list_remove_brackets[-1] == ']':
# list of lists
tuple_string = list_remove_brackets.split('], ')
else:
tuple_string = None
if tuple_string is not None:
if tuple_string[0][0] == '[':
raise ValueError('Nested lists of pathlib.PosixPath objects '+\
'not supported.')
tuple_list = [tuple(x.split(', ') for x in tuple_string)]
list_paths = []
for item in tuple_list:
item_list = []
for t in item:
if t[0][0] == '(':
t[0] = t[0][1:]
if t[-1][-1] == ')':
t[-1] = t[-1][:-1]
item_list.append(t)
list_paths.append(tuple(item_list))
# turn into pathlib.PosixPath objects
list_pathlib = []
for item in list_paths:
item_list = []
for label, path in item:
if label.isdigit():
label = int(label)
if 'PurePosixPath' in path:
remove_str = "PurePosixPath('"
end_index = -1
elif 'PosixPath' in path:
remove_str = "PosixPath('"
end_index = -1
else:
remove_str = "('"
end_index = -1
audiopath = path.replace(remove_str, '')[:end_index]
# end of tuple list, extra "'" character.
# get rid of it
if audiopath[-1] == "'":
audiopath = audiopath[:-1]
audiopath = pathlib.Path(audiopath)
item_list.append(tuple([label, audiopath]))
list_pathlib.append(item_list)
value_original_type = list_pathlib[0]
return value_original_type
list_string_red = value_string[1:-1].split(', ')
if 'PurePosixPath' in value_string:
remove_str = "PurePosixPath('"
end_index = -2
elif 'PosixPath' in value_string:
remove_str = "PosixPath('"
end_index = -2
else:
remove_str = "('"
end_index = -2
# remove unwanted sections of the string items
value_original_type = []
for path in list_string_red:
value_original_type.append(pathlib.Path(
path.replace(remove_str, '')[:end_index]))
return value_original_type
[docs]def adjust_time_units(time_sec):
'''Turns seconds into relevant time units.
This is useful if measuring time of a process and that process
takes longer than a couple minutes.
Parameters
----------
time_sec : int, float
The amount of time measured in seconds.
Returns
-------
total_time : int, float
The total amount of time
units : str
The unites of `total_time`: 'seconds', 'minutes', or 'hours'.
Examples
--------
>>> adjust_time_units(5)
(5, 'seconds')
>>> adjust_time_units(500)
(8.333333333333334, 'minutes')
>>> adjust_time_units(5000)
(1.3888888888888888, 'hours')
'''
if time_sec >= 60 and time_sec < 3600:
total_time = time_sec / 60
units = 'minutes'
elif time_sec >= 3600:
total_time = time_sec / 3600
units = 'hours'
else:
total_time = time_sec
units = 'seconds'
return total_time, units
[docs]def print_progress(iteration, total_iterations, task = None):
'''Prints the status of a process based on iteration number.
Assumes the iteration starts at 0 rather than 1.
Parameters
----------
iteration : int
The iteration of the current process.
total_iterations : int
The total iterations to be completed.
task : str, optional
The relevant task of the process.
Returns
-------
sys.stdout.flush()
Examples
--------
>>> print_progress(4, 10)
50% through current task
>>> print_progress(4, 10, task = 'testing')
50% through testing
'''
progress = (iteration+1) / total_iterations * 100
if task:
sys.stdout.write("\r%d%% through {}".format(task) % progress)
else:
sys.stdout.write("\r%d%% through current task" % progress)
sys.stdout.flush()
[docs]def check_noisy_clean_match(noisyfilename, cleanfilename):
'''Checks if the clean filename is inside of the noisy filename.
This may be helpful to check that two audiofile datasets (a noisy and
clean dataset) are aligned.
'''
clean = os.path.splitext(os.path.basename(cleanfilename))[0]
noisy = os.path.splitext(os.path.basename(noisyfilename))[0]
if clean in noisy:
return True
else:
print('{} is not in {}.'.format(clean, noisy))
return False
[docs]def audiofile_length_match(filename1, filename2):
'''Checks that two audiofiles have the same length.
This may be useful if you have clean and noisy audiofiles that
should be the same length.
Parameters
----------
filename1 : str or pathlib.PosixPath
The path to first audio file.
filename2 : str or pathlib.PosixPath
The path to second audio file.
Returns
-------
bool : True if they match, False if not.
Warning
-------
UserWarning
If the sample rate of the audio files don't match.
UserWarning
If the length of the files don't match.
'''
y1, sr1 = sp.loadsound(filename1)
y2, sr2 = sp.loadsound(filename2)
if sr1 != sr2:
import Warnings
message = '\nWARNING: Sample rates do not match: '+\
'\n{} has sr {}'.format(filename1, sr1)+\
'\n{} has sr {}.'.format(filename2, sr2)
warnings.warn(message)
y2, sr2 = sp.dsp.resample_audio(y2, sr_original = sr2, sr_desired = sr1)
assert sr1 == sr2
if len(y1) != len(y2):
import warnings
message = '\nWARNING: audiofile length mismatch. Length '+\
' {}: \n{}'.format(filename1, len(y1))+\
'Length {}: \n{}'.format(filename2, len(y2))
return False
else:
return True
[docs]def save_dict(filename, dict2save, overwrite=False):
'''Saves dictionary as csv file to indicated path and filename.
Ensures pathlib objects turned to strings. Warning: not thoroughly tested.
Parameters
----------
filename : str
The path and name to save the dictionary under. If '.csv'
extension is not given, it is added.
dict2save : dict
The dictionary that is to be saved
overwrite : bool, optional
Whether or not the saved dictionary should overwrite a
preexisting file (default False)
Returns
----------
path : pathlib.PosixPath
The path where the dictionary was saved
'''
if not isinstance(filename, pathlib.PosixPath):
filename = pathlib.Path(filename)
if filename.parts[-1][-4:] != '.csv':
filename_str = filename.resolve()
filename_csv = filename_str+'.csv'
filename = pathlib.Path(filename_csv)
if not overwrite:
if os.path.exists(filename):
raise FileExistsError(
'The file {} already exists at this path:\
\n{}'.format(filename.parts[-1], filename))
# convert pathlib.PosixPath objects to string - otherwise make things difficult
for key, value in dict2save.items():
if isinstance(value, list):
for i, item in enumerate(value):
if isinstance(item, pathlib.PosixPath) or isinstance(item, pathlib.PurePath):
value[i] = str(item)
elif isinstance(item, list) or isinstance(item, np.ndarray):
if isinstance(item, np.ndarray):
item = list(item)
for j, k in enumerate(item):
if isinstance(k, pathlib.PosixPath) or isinstance(k, pathlib.PurePath):
item[j] = str(k)
value[i] = item
dict2save[key] = value
elif isinstance(value, pathlib.PosixPath) or isinstance(value, pathlib.PurePath):
dict2save[key] = str(value)
with open(filename, 'w') as f:
w = csv.writer(f)
w.writerows(dict2save.items())
return filename
[docs]def load_dict(csv_path):
'''Loads a dictionary from csv file. Expands csv limit if too large.
Increasing the csv limit helps if loading dicitonaries with very long audio
file path lists. For example, see soundpy.datasets.audio2datasets function.
'''
# if a dictionary is already loaded, simply return the dictionary
if isinstance(csv_path, dict):
return csv_path
try:
with open(csv_path, mode='r') as infile:
reader = csv.reader(infile)
dict_prepped = {rows[0]: rows[1] for rows in reader}
except csv.Error:
print('Dictionary values or size is too large.')
print('Maxing out field size limit for loading this dictionary:')
print(csv_path)
print('\nThe new field size limit is:')
maxInt = sys.maxsize
print(maxInt)
csv.field_size_limit(maxInt)
dict_prepped = load_dict(csv_path)
except OverflowError as e:
print(e)
maxInt = int(maxInt/10)
print('Reducing field size limit to: ', maxInt)
dict_prepped = load_dict(csv_path)
return dict_prepped
if __name__ == '__main__':
import doctest
doctest.testmod()