Source code for amen.synthesize

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Audio synthesis'''

import types
import pandas as pd
from scipy.sparse import lil_matrix

import librosa

from .audio import Audio
from .exceptions import SynthesizeError

def _format_inputs(inputs):
    Organizes inputs to be a list of (TimeSlice, start_time) tuples,
    if they are not already of that form.
    # We may want to update this to support "properly" zipped lists of tuples.

    Acceptable forms are:
        - A list of TimeSlices - the TimeSlices will be concatenated.
        - A generator that returns (TimeSlice, start_time).
        - A tuple of (TimeSlices, start_times).
    if isinstance(inputs, list):
        time_index = pd.to_timedelta(0.0, 's')
        timings = []
        for time_slice in inputs:
            time_index = time_index + time_slice.duration

        return zip(inputs, timings)

    elif isinstance(inputs, tuple):
        return zip(inputs[0], inputs[1])

    elif isinstance(inputs, types.GeneratorType):
        return inputs
    raise SynthesizeError('Invalid synthesis timing format: {}'.format(inputs))

[docs]def synthesize(inputs): """ Generate new Audio objects for output or further remixing. Parameters ---------- inputs: generator, list, or tuple. See _format_inputs for details on parsing inputs. Returns ------ An Audio object """ # First we organize our inputs. inputs = _format_inputs(inputs) max_time = 0.0 sample_rate = 44100 array_length = 20 * 60 # 20 minutes! array_shape = (2, sample_rate * array_length) sparse_array = lil_matrix(array_shape) initial_offset = 0 for i, (time_slice, start_time) in enumerate(inputs): # if we have a mono file, we return stereo here. resampled_audio, left_offset, right_offset = time_slice.get_samples() # set the initial offset, so we don't miss the start of the array if i == 0: initial_offset = max(left_offset * -1, right_offset * -1) # get the target start and duration start_time = * 1e-9 duration = * 1e-9 # find the max time if start_time + duration > max_time: max_time = start_time + duration # error if we'd go too far if start_time + duration > array_length: raise SynthesizeError("Amen can only synthesize up to 20 minutes of audio.") # get the target start and end samples starting_sample, _ = librosa.time_to_samples([start_time, start_time + duration], # figure out the actual starting and ending samples for each channel left_start = starting_sample + left_offset + initial_offset right_start = starting_sample + right_offset + initial_offset # add the data from each channel to the array sparse_array[0, left_start:left_start + len(resampled_audio[0])] += resampled_audio[0] sparse_array[1, right_start:right_start + len(resampled_audio[1])] += resampled_audio[1] max_samples = librosa.time_to_samples([max_time], sr=sample_rate) truncated_array = sparse_array[:, 0:max_samples[0]].toarray() return Audio(raw_samples=truncated_array, sample_rate=sample_rate)