hyperion.data
Classes:
-
DataLoader–Iterator over a dataset yielding batches.
-
SimpleDataset–Simple Dataset that returns a tuple of arrays (inputs, outputs).
-
StochasticLoader–Loader that yields batches of unique random indices.
-
SubSet–Dataset subset.
Functions:
-
create_random_split–Create a random split.
-
downsample_ds–Return a downsampled subset of the dataset.
-
randomize_ds–Randomize a dataset.
DataLoader
DataLoader(dataset, batch_size, rng, shuffle=False, infinite=False)
Bases: object
Iterator over a dataset yielding batches.
| Parameters: |
|
|---|
| Attributes: |
|
|---|
| Parameters: |
|
|---|
Source code in hyperion/data.py
def __init__(self, dataset, batch_size, rng, shuffle=False, infinite=False):
"""Create a data loader that yields batches from ``dataset``.
Parameters
----------
dataset : object
Dataset providing ``__len__`` and ``__getitem__``.
batch_size : int
Number of samples per batch.
rng : numpy.random.Generator
Random number generator used for shuffling.
shuffle : bool, optional
Whether to shuffle the dataset each epoch.
infinite : bool, optional
If True, the iterator yields batches indefinitely.
"""
self._dataset = dataset
self._batch_size = batch_size
self._rng = rng
self._shuffle = shuffle
self._n_batches = int(np.ceil(len(self._dataset) / self._batch_size))
self._infinite = infinite
n_batches
property
n_batches
Number of batches per epoch.
SimpleDataset
SimpleDataset(*arrays)
Bases: object
Simple Dataset that returns a tuple of arrays (inputs, outputs).
| Parameters: |
|
|---|
| Attributes: |
|
|---|
| Parameters: |
|
|---|
Source code in hyperion/data.py
def __init__(self, *arrays):
"""Initialize the dataset with one or more aligned arrays.
Parameters
----------
*arrays : sequence of array-like
Arrays to store; all arrays must have the same length.
"""
super(SimpleDataset, self).__init__()
self._arrays = arrays
self._len = len(arrays[0])
for arr in arrays:
if len(arr) != self._len:
raise ValueError("Inputs and outputs must have same length.")
StochasticLoader
StochasticLoader(dataset, batch_size, rng)
Bases: object
Loader that yields batches of unique random indices.
| Parameters: |
|
|---|
| Parameters: |
|
|---|
Source code in hyperion/data.py
def __init__(self, dataset, batch_size, rng):
"""Initialize the stochastic loader.
Parameters
----------
dataset : object
Dataset to sample from.
batch_size : int
Size of each returned batch.
rng : numpy.random.Generator
Random number generator.
"""
self._dataset = dataset
self._batch_size = batch_size
self._rng = rng
SubSet
SubSet(dataset, subset_ix)
Bases: object
Dataset subset.
| Parameters: |
|
|---|
| Attributes: |
|
|---|
| Parameters: |
|
|---|
Source code in hyperion/data.py
def __init__(self, dataset, subset_ix):
"""Create a subset view of an existing dataset.
Parameters
----------
dataset : object
Original dataset object supporting ``__len__`` and ``__getitem__``.
subset_ix : sequence of int
Indices included in the subset.
"""
super(SubSet, self).__init__()
if max(subset_ix) > len(dataset):
raise RuntimeError("Invalid index")
self._subset_ix = subset_ix
self._len = len(subset_ix)
self._dataset = dataset
create_random_split
create_random_split(dataset, split_len, rng)
Create a random split.
| Parameters: |
|
|---|
| Returns: |
|
|---|
Source code in hyperion/data.py
def create_random_split(dataset, split_len, rng):
"""Create a random split.
Parameters
----------
dataset : object
Dataset to split.
split_len : int
Length of the first split.
rng : numpy.random.Generator
Random number generator.
Returns
-------
tuple
Tuple of (first_split, second_split) subsets.
"""
ixs = np.arange(len(dataset))
rng.shuffle(ixs)
first_split = SubSet(dataset, ixs[:split_len])
second_split = SubSet(dataset, ixs[split_len:])
return first_split, second_split
downsample_ds
downsample_ds(dataset, fraction, rng, copy=False)
Return a downsampled subset of the dataset.
| Parameters: |
|
|---|
| Returns: |
|
|---|
Source code in hyperion/data.py
def downsample_ds(dataset, fraction, rng, copy=False):
"""Return a downsampled subset of the dataset.
Parameters
----------
dataset : object
Dataset to sample.
fraction : float
Fraction of dataset to include (0 < fraction <= 1).
rng : numpy.random.Generator
Random number generator.
copy : bool, optional
If True, return an array slice of the dataset instead of a SubSet.
Returns
-------
SubSet or array-like
Downsampled dataset view or copy.
"""
subset_len = int(fraction * len(dataset))
ixs = np.arange(len(dataset))
rng.shuffle(ixs)
if copy:
return dataset[ixs[:subset_len]]
return SubSet(dataset, ixs[:subset_len])
randomize_ds
randomize_ds(dataset, rng)
Randomize a dataset.
| Parameters: |
|
|---|
| Returns: |
|
|---|
Source code in hyperion/data.py
def randomize_ds(dataset, rng):
"""Randomize a dataset.
Parameters
----------
dataset : object
Dataset to randomize.
rng : numpy.random.Generator
Random number generator.
Returns
-------
SubSet
Randomized subset of the dataset.
"""
ixs = np.arange(len(dataset))
rng.shuffle(ixs)
return SubSet(dataset, ixs)