In [44]:
experiment_dir='/home/vnfm/pyannote-audio/tutorials/models/speaker_embedding'
config_yml = experiment_dir + '/config.yml'
epochs = 2
db_yml = '~/.pyannote/db.yml'

In [45]:
from pyannote.database import get_protocol, FileFinder,get_database,get_databases
preprocessors = {'audio': FileFinder()}
protocol = get_protocol('AMI.SpeakerDiarization.MixHeadset',preprocessors=preprocessors)
from pyannote.audio.features.utils import get_audio_duration

In [46]:
from pyannote.audio.features.utils import get_audio_duration

In [47]:
import numpy as np
from pyannote.core import Segment
from pyannote.generators.fragment import random_segment
from pyannote.generators.fragment import random_subsegment
from pyannote.generators.batch import batchify, EndOfBatch
from pyannote.database.protocol import SpeakerDiarizationProtocol
class SpeechSegmentGenerator(object):
    """Generate batch of pure speech segments with associated speaker labels
    Parameters
    ----------
    feature_extraction : `pyannote.audio.features.FeatureExtraction`
        Feature extraction.
    per_label : int, optional
        Number of speech turns per speaker in each batch. Defaults to 3.
    label_min_duration : float, optional
        Remove speakers with less than `label_min_duration` seconds of speech.
        Defaults to 0 (i.e. keep it all).
    per_fold : int, optional
        Number of speakers in each batch. Defaults to all speakers.
    per_epoch : float, optional
        Number of days per epoch. Defaults to 7 (a week).
    duration : float, optional
        Use fixed duration segments with this `duration`.
        Defaults (None) to using variable duration segments.
    min_duration : float, optional
        In case `duration` is None, set segment minimum duration.
    max_duration : float, optional
        In case `duration` is None, set segment maximum duration.
    parallel : int, optional
        Number of prefetching background generators. Defaults to 1.
        Each generator will prefetch enough batches to cover a whole epoch.
        Set `parallel` to 0 to not use background generators.
    """

    def __init__(self, feature_extraction,
                 per_label=3, per_fold=None, per_epoch=7,
                 duration=None, min_duration=None, max_duration=None,
                 label_min_duration=0., parallel=1):

        super(SpeechSegmentGenerator, self).__init__()

        self.feature_extraction = feature_extraction
        self.per_label = per_label
        #self.per_label = 3
        self.per_fold = per_fold
        print(per_fold )
        self.per_epoch = per_epoch
        self.duration = duration
        self.parallel = parallel
        self.label_min_duration = label_min_duration

        if self.duration is None:
            self.min_duration = min_duration
            self.max_duration = max_duration
        else:
            self.min_duration = self.duration
            self.max_duration = self.duration

        self.min_duration_ = 0. if self.min_duration is None \
                                else self.min_duration
        print('min_duration_')
        print(self.min_duration_)#0.5
        self.weighted_ = True

    def initialize(self, protocol, subset='train'):

        self.data_ = {}
        databases = set()

        # loop once on all files
        for current_file in getattr(protocol, subset)():

            # keep track of database
            database = current_file['database']
            databases.add(database)

            # get annotation for current file
            annotation = current_file['annotation']

            # loop on each label in current file
            for label in annotation.labels():

                # get all segments with current label
                timeline = annotation.label_timeline(label)

                # remove segments shorter than min_duration (when provided)
                segments = [s for s in timeline
                              if s.duration > self.min_duration_]

                # corner case where no segment is long enough
                # and we removed them all...
                if not segments:
                    continue

                # total duration of label in current_file (after removal of
                # short segments).
                duration = sum(s.duration for s in segments)

                # store all these in data_ dictionary
                # datum = (segment_generator, duration, current_file, features)
                datum = (segments, duration, current_file)
                self.data_.setdefault(label, []).append(datum)
                #print(self.data_.setdefault)

        # remove labels with less than 'label_min_duration' of speech
        # otherwise those may generate the same segments over and over again
        dropped_labels = set()
        for label, data in self.data_.items():
            total_duration = sum(datum[1] for datum in data)
            if total_duration < self.label_min_duration:
                dropped_labels.add(label)

        for label in dropped_labels:
            self.data_.pop(label)

        self.labels_ = {label: i for i, label in enumerate(self.data_)}

        self.domains_ = {}
        self.domains_['database'] = {db: i for i, db in enumerate(databases)}

    def generator(self):
        print(dir(self.data_))
        labels = list(self.data_)
        while True:
            # shuffle labels
            np.random.shuffle(labels)
            # loop on each label
            for label in labels:
                # load data for this label
                segments, durations, files = zip(*self.data_[label])
                print('files[i]')
                #print(files)
                # choose 'per_label' files at random with probability
                # proportional to the total duration of 'label' in those files
                probabilities = durations / np.sum(durations)
                chosen = np.random.choice(len(files), size=self.per_label,
                                          p=probabilities)

                # loop on (randomly) chosen files
                for i in chosen:

                    # choose one segment at random with
                    # probability proportional to duration
                    # segment = next(segment_generators[i])
                    segment = next(
                        random_segment(segments[i], weighted=self.weighted_))

                    # choose sub-segment at random at exactly duration
                    #print(self.duration)
                    sub_segment = next(random_subsegment(segment, self.duration))
                    #print(dir(files[i]))
                    print(self.feature_extraction)
                    X = self.feature_extraction.crop(
                            current_file=files[i], segment=sub_segment, mode='center',
                            fixed=self.duration)
                    #print('X')
                    #print('label')
                    #print(label)
                    database = files[i]['database']
                    extra = {'label': label,
                             'database': database}

                    yield {'X': X,
                           'y': self.labels_[label],
                           'y_database': self.domains_['database'][database],
                           'extra': extra}

    @property
    def batch_size(self):
        if self.per_fold is not None:
            return self.per_label * self.per_fold
        return self.per_label * len(self.data_)

    @property
    def batches_per_epoch(self):

        # duration per epoch
        duration_per_epoch = self.per_epoch * 24 * 60 * 60
        print(self.per_epoch)
        # (average) duration per segment
        if self.duration is None:
            min_duration = 0. if self.min_duration is None \
                              else self.min_duration
            duration = .5 * (min_duration + self.max_duration)
        else:
            duration = self.duration

        # (average) duration per batch
        duration_per_batch = duration * self.batch_size

        # number of batches per epoch
        return int(np.ceil(duration_per_epoch / duration_per_batch))

    @property
    def n_classes(self):
        return len(self.data_)

    @property
    def labels(self):
        labels, _ = zip(*sorted(self.labels_.items(),
                                key=lambda x: x[1]))
        print('self.labels_.items()')
        print(self.labels_.items())
        return labels

    @property
    def signature(self):
        return {'X': {'@': (None, None)},
                'y': {'@': (None, np.stack)},
                'y_database': {'@': (None, np.stack)},
                'extra': {'label': {'@': (None, None)},
                         'database': {'@': (None, None)}}}

    def __call__(self, protocol, subset='train'):

        self.initialize(protocol, subset=subset)

        batch_size = self.batch_size
        print('batch_size')
        print(batch_size)
        batches_per_epoch = self.batches_per_epoch
        print('batches_per_epoch')
        print(batches_per_epoch)
        generators = []
        generator = self.generator()
        batches = batchify(generator, self.signature, batch_size=batch_size, prefetch=0)
        generators.append(batches)

        while True:
            # get `batches_per_epoch` batches from each generator
            for batches in generators:
                for _ in range(batches_per_epoch):
                    yield next(batches)

In [48]:
import yaml
import io
from pathlib import Path
from glob import glob
import numpy as np
from numpy.lib.format import open_memmap
from struct import unpack

from pyannote.core import SlidingWindow, SlidingWindowFeature
from pyannote.database.util import get_unique_identifier
from pyannote.audio.util import mkdir_p
class Precomputed(object):
    """Precomputed features

        Parameters
        ----------
        root_dir : `str`
            Path to directory where precomputed features are stored.
        use_memmap : `bool`, optional
            Defaults to True.
        sliding_window : `SlidingWindow`, optional
            Sliding window used for feature extraction. This is not used when
            `root_dir` already exists and contains `metadata.yml`.
            #在apply模式下，会传入sliding_window和dimension参数以及labels
        dimension : `int`, optional
            Dimension of feature vectors. This is not used when `root_dir` already
            exists and contains `metadata.yml`.
        labels : iterable, optional
            Human-readable name for each dimension.

        Notes
        -----
        If `root_dir` directory does not exist, one must provide both
        `sliding_window` and `dimension` parameters in order to create and
        populate file `root_dir/metadata.yml` when instantiating.

    """

    def get_path(self, item):
        uri = get_unique_identifier(item)
        path = '{root_dir}/{uri}.npy'.format(root_dir=self.root_dir, uri=uri)
        return path

    def __init__(self, root_dir=None, use_memmap=True,
        sliding_window=None, dimension=None, labels=None,
        augmentation=None):

        if augmentation is not None:
            msg = 'Data augmentation is not supported by `Precomputed`.'
            raise ValueError(msg)

        super(Precomputed, self).__init__()
        self.root_dir = Path(root_dir).expanduser().resolve(strict=False)
        print(self.root_dir)
        #self.root_dir= '/home/vnfm/pyannote.audio-1.0/'
        self.use_memmap = use_memmap

        # path:/home/vnfm/pyannote-audio-1.0/tutorials/feature-extraction/metadata.yml,when segmentation:train
            # dimension: 2
            # duration: 0.025
            # start: -0.0125
            # step: 0.01
        path = self.root_dir / 'metadata.yml'
        if path.exists():

            with io.open(path, 'r') as f:
                params = yaml.load(f)

            self.dimension_ = params.pop('dimension')
            self.labels_ = params.pop('labels', None)
            # SlidingWindow需要参数如:start=0.0, step=0.100, duration=0.200
            self.sliding_window_ = SlidingWindow(**params)

            if dimension is not None and self.dimension_ != dimension:
                msg = 'inconsistent "dimension" (is: {0}, should be: {1})'
                raise ValueError(msg.format(dimension, self.dimensions_))

            if labels is not None and self.labels_ != labels:
                msg = 'inconsistent "labels" (is {0}, should be: {1})'
                raise ValueError(msg.format(labels, self.labels_))

            if ((sliding_window is not None) and
                ((sliding_window.start != self.sliding_window_.start) or
                 (sliding_window.duration != self.sliding_window_.duration) or
                 (sliding_window.step != self.sliding_window_.step))):
                msg = 'inconsistent "sliding_window"'
                raise ValueError(msg)

        else:

            if sliding_window is None or dimension is None:
                msg = (
                    f'Either directory {self.root_dir} does not exist or it '
                    f'does not contain precomputed features. In case it exists '
                    f'and this was done on purpose, please provide both '
                    f'`sliding_window` and `dimension` parameters when '
                    f'instantianting `Precomputed`.')
                raise ValueError(msg)

            # create parent directory
            mkdir_p(path.parent)

            params = {'start': sliding_window.start,
                      'duration': sliding_window.duration,
                      'step': sliding_window.step,
                      'dimension': dimension}
            if labels is not None:
                params['labels'] = labels

            with io.open(path, 'w') as f:
                yaml.dump(params, f, default_flow_style=False)

            self.sliding_window_ = sliding_window
            self.dimension_ = dimension
            self.labels_ = labels

    @property
    def sliding_window(self):
        """Sliding window used for feature extraction"""
        return self.sliding_window_

    @property
    def dimension(self):
        """Dimension of feature vectors"""
        return self.dimension_

    @property
    def labels(self):
        """Human-readable label of each dimension"""
        return self.labels_

    def __call__(self, current_file):
        """Obtain features for file

        Parameters
        ----------
        current_file : dict
            `pyannote.database` files.

        Returns
        -------
        features : `pyannote.core.SlidingWindowFeature`
            Features
        """

        path = Path(self.get_path(current_file))
        #path='/home/vnfm/pyannote.audio-1.0/tutorials/feature-extraction'
        if not path.exists():
            uri = get_unique_identifier(current_file)
            msg = f'No precomputed features for "{uri}".'
            raise PyannoteFeatureExtractionError(msg)

        if self.use_memmap:
            data = np.load(str(path), mmap_mode='r')
        else:
            data = np.load(str(path))

        return SlidingWindowFeature(data, self.sliding_window_)

    def crop(self, current_file, segment, mode='center', fixed=None,return_data=True):
        """Fast version of self(current_file).crop(segment, **kwargs)

        Parameters
        ----------
        current_file : dict
            `pyannote.database` file.
        segment : `pyannote.core.Segment`
            Segment from which to extract features.

        Returns
        -------
        features : (n_frames, dimension) numpy array
            Extracted features

        See also
        --------
        `pyannote.core.SlidingWindowFeature.crop`
        """

        # match default FeatureExtraction.crop behavior
        if mode == 'center' and fixed is None:
            fixed = segment.duration

        memmap = open_memmap(self.get_path(current_file), mode='r')
        swf = SlidingWindowFeature(memmap, self.sliding_window_)
        result = swf.crop(segment, mode=mode, fixed=fixed,
                          return_data=return_data)
        del memmap
        return result

    def shape(self, item):
        """Faster version of precomputed(item).data.shape"""
        memmap = open_memmap(self.get_path(item), mode='r')
        shape = memmap.shape
        del memmap
        return shape

    def dump(self, item, features):
        path = Path(self.get_path(item))
        mkdir_p(path.parent)
        np.save(path, features.data)


class PrecomputedHTK(object):

    def __init__(self, root_dir=None, duration=0.025, step=None):
        super(PrecomputedHTK, self).__init__()
        self.root_dir = root_dir
        self.duration = duration

        # load any htk file in root_dir/database
        path = '{root_dir}/*/*.htk'.format(root_dir=root_dir)
        found = glob(path)

        # FIXME switch to Py3.5 and use glob 'recursive' parameter
        # http://stackoverflow.com/questions/2186525/
        # use-a-glob-to-find-files-recursively-in-python

        if len(found) > 0:
            file_htk = found[0]
        else:
            msg = "Could not find any HTK file in '{root_dir}'."
            raise ValueError(msg.format(root_dir=root_dir))

        X, sample_period = self.load_htk(file_htk)
        self.dimension_ = X.shape[1]
        self.step = sample_period * 1e-7

        # don't trust HTK header when 'step' is provided by the user.
        # HACK remove this when Pepe's HTK files are fixed...
        if step is not None:
            self.step = step

        self.sliding_window_ = SlidingWindow(start=0.,
                                             duration=self.duration,
                                             step=self.step)
    @property
    def sliding_window(self):
        return self.sliding_window_

    @property
    def dimension(self):
        return self.dimension_

    @staticmethod
    def get_path(root_dir, item):
        uri = get_unique_identifier(item)
        path = '{root_dir}/{uri}.htk'.format(root_dir=root_dir, uri=uri)
        return path

    # http://codereview.stackexchange.com/questions/
    # 1496/reading-a-binary-file-containing-periodic-samples
    @staticmethod
    def load_htk(file_htk):
        with open(file_htk, 'rb') as fp:
            data = fp.read(12)
            num_samples, sample_period, sample_size, _ = unpack('>iihh', data)
            num_features = int(sample_size / 4)
            num_samples = int(num_samples)
            X = np.empty((num_samples, num_features))
            for i in range(num_samples):
                data = fp.read(sample_size)
                X[i, :] = unpack('>' + ('f' * (sample_size // 4)), data)
        return X, sample_period

    def __call__(self, item):
        file_htk = self.get_path(self.root_dir, item)
        X, _ = self.load_htk(file_htk)
        return SlidingWindowFeature(X, self.sliding_window_)

In [50]:
#from pyannote.audio.embedding.generators import SpeechSegmentGenerator
#from pyannote.audio.features.with_librosa import LibrosaMFCC

#from pyannote.audio.features.precomputed import Precomputed
from pyannote.audio.embedding.approaches import TripletLoss
import yaml
from pyannote.database import get_protocol, FileFinder, get_database,get_databases
preprocessors = {'audio': FileFinder()}
protocol = get_protocol('AMI.SpeakerDiarization.MixHeadset',preprocessors=preprocessors)


with open(config_yml, 'r') as fp: 
    config = yaml.load(fp)
    
FeatureExtraction = Precomputed
print(config['feature_extraction'].get('params', {}))

feature_extraction = FeatureExtraction(
    **config['feature_extraction'].get('params', {}))

Approach = TripletLoss
task_ = Approach(**config['approach'].get('params', {}))
print(dir(task_))


            
subset='train'
#batch_generator =task_.get_batch_generator(feature_extraction, protocol, subset=subset)

#batch_generator = get_batch_generator(feature_extraction)
#batch_generator_ = batch_generator
#batches_ = batch_generator_()
#print(batches_)
#"""
batch_generator = SpeechSegmentGenerator(
                feature_extraction,
                #per_label=task_.per_label, 
                per_fold=task_.per_fold,
                per_epoch=task_.per_epoch, 
                duration=0.025,
                min_duration=task_.min_duration, 
                max_duration=task_.max_duration,
                )
#"""
batches = batch_generator(protocol)
batch = next(batches)
print(batch)
#batches_per_epoch = getattr(batch_generator, 'batches_per_epoch', None)
#print(labels)
#labels = getattr(batch_generator, 'labels', None)
#print(labels)
#label = next(labels)
#print(label)
#print(batch)
#batches = batch_generator(feature_extraction,protocol, subset=subset)

{'root_dir': 'feature-extraction'}
/home/vnfm/speaker_diarization_overlap/feature-extraction
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__metaclass__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_choose_lr', 'aggregate', 'auto_lr', 'batch_all', 'batch_easy', 'batch_hard', 'batch_loss', 'batch_negative', 'clamp', 'duration', 'extra_init', 'extra_restart', 'fit', 'fit_iter', 'forward', 'get_batch_generator', 'label_min_duration', 'margin', 'margin_', 'max_distance', 'max_duration', 'metric', 'min_duration', 'on_epoch_end', 'on_train_start', 'parallel', 'pdist', 'per_epoch', 'per_fold', 'per_label', 'sampling', 'to_numpy', 'triplet_loss', 'variant']
20
min_duration_
0.025
batch_size
60
1
batches_per_epoch
57600
['__class__', '__cont