svcore  1.9
FeatureExtractionModelTransformer.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam and QMUL.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
17 
19 #include "plugin/PluginXml.h"
20 #include <vamp-hostsdk/Plugin.h>
21 
22 #include "data/model/Model.h"
23 #include "base/Window.h"
24 #include "base/Exceptions.h"
29 #include "data/model/NoteModel.h"
31 #include "data/model/RegionModel.h"
32 #include "data/model/FFTModel.h"
35 
36 #include "TransformFactory.h"
37 
38 #include <iostream>
39 
40 #include <QSettings>
41 
43  const Transform &transform) :
44  ModelTransformer(in, transform),
45  m_plugin(0)
46 {
47 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
48 
49  initialise();
50 }
51 
53  const Transforms &transforms) :
54  ModelTransformer(in, transforms),
55  m_plugin(0)
56 {
57 // SVDEBUG << "FeatureExtractionModelTransformer::FeatureExtractionModelTransformer: plugin " << pluginId << ", outputName " << m_transform.getOutput() << endl;
58 
59  initialise();
60 }
61 
62 static bool
64 {
65  Transform t2o(t2);
66  t2o.setOutput(t1.getOutput());
67  return t1 == t2o;
68 }
69 
70 bool
72 {
73  // All transforms must use the same plugin, parameters, and
74  // inputs: they can differ only in choice of plugin output. So we
75  // initialise based purely on the first transform in the list (but
76  // first check that they are actually similar as promised)
77 
78  for (int j = 1; j < (int)m_transforms.size(); ++j) {
80  m_message = tr("Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output");
81  return false;
82  }
83  }
84 
85  Transform primaryTransform = m_transforms[0];
86 
87  QString pluginId = primaryTransform.getPluginIdentifier();
88 
91 
92  if (!factory) {
93  m_message = tr("No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
94  return false;
95  }
96 
98  if (!input) {
99  m_message = tr("Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
100  return false;
101  }
102 
103  m_plugin = factory->instantiatePlugin(pluginId, input->getSampleRate());
104  if (!m_plugin) {
105  m_message = tr("Failed to instantiate plugin \"%1\"").arg(pluginId);
106  return false;
107  }
108 
110  (primaryTransform, m_plugin);
111 
113  (primaryTransform, m_plugin);
114 
115  int channelCount = input->getChannelCount();
116  if ((int)m_plugin->getMaxChannelCount() < channelCount) {
117  channelCount = 1;
118  }
119  if ((int)m_plugin->getMinChannelCount() > channelCount) {
120  m_message = tr("Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
121  .arg(pluginId)
122  .arg(m_plugin->getMinChannelCount())
123  .arg(m_plugin->getMaxChannelCount())
124  .arg(input->getChannelCount());
125  return false;
126  }
127 
128  SVDEBUG << "Initialising feature extraction plugin with channels = "
129  << channelCount << ", step = " << primaryTransform.getStepSize()
130  << ", block = " << primaryTransform.getBlockSize() << endl;
131 
132  if (!m_plugin->initialise(channelCount,
133  primaryTransform.getStepSize(),
134  primaryTransform.getBlockSize())) {
135 
136  int pstep = primaryTransform.getStepSize();
137  int pblock = primaryTransform.getBlockSize();
138 
140  primaryTransform.setStepSize(0);
141  primaryTransform.setBlockSize(0);
143  (primaryTransform, m_plugin);
144 
145  if (primaryTransform.getStepSize() != pstep ||
146  primaryTransform.getBlockSize() != pblock) {
147 
148  if (!m_plugin->initialise(channelCount,
149  primaryTransform.getStepSize(),
150  primaryTransform.getBlockSize())) {
151 
152  m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
153  return false;
154 
155  } else {
156 
157  m_message = tr("Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
158  .arg(pluginId)
159  .arg(pstep)
160  .arg(pblock)
161  .arg(primaryTransform.getStepSize())
162  .arg(primaryTransform.getBlockSize());
163  }
164 
165  } else {
166 
167  m_message = tr("Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
168  return false;
169  }
170  }
171 
172  if (primaryTransform.getPluginVersion() != "") {
173  QString pv = QString("%1").arg(m_plugin->getPluginVersion());
174  if (pv != primaryTransform.getPluginVersion()) {
175  QString vm = tr("Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
176  .arg(primaryTransform.getPluginVersion())
177  .arg(pluginId)
178  .arg(pv);
179  if (m_message != "") {
180  m_message = QString("%1; %2").arg(vm).arg(m_message);
181  } else {
182  m_message = vm;
183  }
184  }
185  }
186 
187  Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
188 
189  if (outputs.empty()) {
190  m_message = tr("Plugin \"%1\" has no outputs").arg(pluginId);
191  return false;
192  }
193 
194  for (int j = 0; j < (int)m_transforms.size(); ++j) {
195 
196  for (int i = 0; i < (int)outputs.size(); ++i) {
197 // SVDEBUG << "comparing output " << i << " name \"" << outputs[i].identifier << "\" with expected \"" << m_transform.getOutput() << "\"" << endl;
198  if (m_transforms[j].getOutput() == "" ||
199  outputs[i].identifier == m_transforms[j].getOutput().toStdString()) {
200  m_outputNos.push_back(i);
201  m_descriptors.push_back(new Vamp::Plugin::OutputDescriptor(outputs[i]));
202  m_fixedRateFeatureNos.push_back(-1); // we increment before use
203  break;
204  }
205  }
206 
207  if ((int)m_descriptors.size() <= j) {
208  m_message = tr("Plugin \"%1\" has no output named \"%2\"")
209  .arg(pluginId)
210  .arg(m_transforms[j].getOutput());
211  return false;
212  }
213  }
214 
215  for (int j = 0; j < (int)m_transforms.size(); ++j) {
217  }
218 
219  return true;
220 }
221 
222 void
224 {
226 
227 // cerr << "FeatureExtractionModelTransformer::createOutputModel: sample type " << m_descriptor->sampleType << ", rate " << m_descriptor->sampleRate << endl;
228 
229  PluginRDFDescription description(m_transforms[n].getPluginIdentifier());
230  QString outputId = m_transforms[n].getOutput();
231 
232  int binCount = 1;
233  float minValue = 0.0, maxValue = 0.0;
234  bool haveExtents = false;
235  bool haveBinCount = m_descriptors[n]->hasFixedBinCount;
236 
237  if (haveBinCount) {
238  binCount = m_descriptors[n]->binCount;
239  }
240 
241  m_needAdditionalModels[n] = false;
242 
243 // cerr << "FeatureExtractionModelTransformer: output bin count "
244 // << binCount << endl;
245 
246  if (binCount > 0 && m_descriptors[n]->hasKnownExtents) {
247  minValue = m_descriptors[n]->minValue;
248  maxValue = m_descriptors[n]->maxValue;
249  haveExtents = true;
250  }
251 
252  int modelRate = input->getSampleRate();
253  int modelResolution = 1;
254 
255  if (m_descriptors[n]->sampleType !=
256  Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
257  if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
258  cerr << "WARNING: plugin reports output sample rate as "
259  << m_descriptors[n]->sampleRate << " (can't display features with finer resolution than the input rate of " << input->getSampleRate() << ")" << endl;
260  }
261  }
262 
263  switch (m_descriptors[n]->sampleType) {
264 
265  case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
266  if (m_descriptors[n]->sampleRate != 0.0) {
267  modelResolution = int(modelRate / m_descriptors[n]->sampleRate + 0.001);
268  }
269  break;
270 
271  case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
272  modelResolution = m_transforms[n].getStepSize();
273  break;
274 
275  case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
281  if (m_descriptors[n]->sampleRate > input->getSampleRate()) {
282  modelResolution = 1;
283  } else {
284  modelResolution = int(round(input->getSampleRate() /
285  m_descriptors[n]->sampleRate));
286  }
287  break;
288  }
289 
290  bool preDurationPlugin = (m_plugin->getVampApiVersion() < 2);
291 
292  Model *out = 0;
293 
294  if (binCount == 0 &&
295  (preDurationPlugin || !m_descriptors[n]->hasDuration)) {
296 
297  // Anything with no value and no duration is an instant
298 
299  out = new SparseOneDimensionalModel(modelRate, modelResolution, false);
300  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
301  out->setRDFTypeURI(outputEventTypeURI);
302 
303  } else if ((preDurationPlugin && binCount > 1 &&
304  (m_descriptors[n]->sampleType ==
305  Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
306  (!preDurationPlugin && m_descriptors[n]->hasDuration)) {
307 
308  // For plugins using the old v1 API without explicit duration,
309  // we treat anything that has multiple bins (i.e. that has the
310  // potential to have value and duration) and a variable sample
311  // rate as a note model, taking its values as pitch, duration
312  // and velocity (if present) respectively. This is the same
313  // behaviour as always applied by SV to these plugins in the
314  // past.
315 
316  // For plugins with the newer API, we treat anything with
317  // duration as either a note model with pitch and velocity, or
318  // a region model.
319 
320  // How do we know whether it's an interval or note model?
321  // What's the essential difference? Is a note model any
322  // interval model using a Hz or "MIDI pitch" scale? There
323  // isn't really a reliable test for "MIDI pitch"... Does a
324  // note model always have velocity? This is a good question
325  // to be addressed by accompanying RDF, but for the moment we
326  // will do the following...
327 
328  bool isNoteModel = false;
329 
330  // Regions have only value (and duration -- we can't extract a
331  // region model from an old-style plugin that doesn't support
332  // duration)
333  if (binCount > 1) isNoteModel = true;
334 
335  // Regions do not have units of Hz or MIDI things (a sweeping
336  // assumption!)
337  if (m_descriptors[n]->unit == "Hz" ||
338  m_descriptors[n]->unit.find("MIDI") != std::string::npos ||
339  m_descriptors[n]->unit.find("midi") != std::string::npos) {
340  isNoteModel = true;
341  }
342 
343  // If we had a "sparse 3D model", we would have the additional
344  // problem of determining whether to use that here (if bin
345  // count > 1). But we don't.
346 
347  QSettings settings;
348  settings.beginGroup("Transformer");
349  bool flexi = settings.value("use-flexi-note-model", false).toBool();
350  settings.endGroup();
351 
352  cerr << "flexi = " << flexi << endl;
353 
354  if (isNoteModel && !flexi) {
355 
356  NoteModel *model;
357  if (haveExtents) {
358  model = new NoteModel
359  (modelRate, modelResolution, minValue, maxValue, false);
360  } else {
361  model = new NoteModel
362  (modelRate, modelResolution, false);
363  }
364  model->setScaleUnits(m_descriptors[n]->unit.c_str());
365  out = model;
366 
367  } else if (isNoteModel && flexi) {
368 
369  FlexiNoteModel *model;
370  if (haveExtents) {
371  model = new FlexiNoteModel
372  (modelRate, modelResolution, minValue, maxValue, false);
373  } else {
374  model = new FlexiNoteModel
375  (modelRate, modelResolution, false);
376  }
377  model->setScaleUnits(m_descriptors[n]->unit.c_str());
378  out = model;
379 
380  } else {
381 
382  RegionModel *model;
383  if (haveExtents) {
384  model = new RegionModel
385  (modelRate, modelResolution, minValue, maxValue, false);
386  } else {
387  model = new RegionModel
388  (modelRate, modelResolution, false);
389  }
390  model->setScaleUnits(m_descriptors[n]->unit.c_str());
391  out = model;
392  }
393 
394  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
395  out->setRDFTypeURI(outputEventTypeURI);
396 
397  } else if (binCount == 1 ||
398  (m_descriptors[n]->sampleType ==
399  Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
400 
401  // Anything that is not a 1D, note, or interval model and that
402  // has only one value per result must be a sparse time value
403  // model.
404 
405  // Anything that is not a 1D, note, or interval model and that
406  // has a variable sample rate is treated as a set of sparse
407  // time value models, one per output bin, because we lack a
408  // sparse 3D model.
409 
410  // Anything that is not a 1D, note, or interval model and that
411  // has a fixed sample rate but an unknown number of values per
412  // result is also treated as a set of sparse time value models.
413 
414  // For sets of sparse time value models, we create a single
415  // model first as the "standard" output and then create models
416  // for bins 1+ in the additional model map (mapping the output
417  // descriptor to a list of models indexed by bin-1). But we
418  // don't create the additional models yet, as this case has to
419  // work even if the number of bins is unknown at this point --
420  // we create an additional model (copying its parameters from
421  // the default one) each time a new bin is encountered.
422 
423  if (!haveBinCount || binCount > 1) {
424  m_needAdditionalModels[n] = true;
425  }
426 
427  SparseTimeValueModel *model;
428  if (haveExtents) {
429  model = new SparseTimeValueModel
430  (modelRate, modelResolution, minValue, maxValue, false);
431  } else {
432  model = new SparseTimeValueModel
433  (modelRate, modelResolution, false);
434  }
435 
436  Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();
437  model->setScaleUnits(outputs[m_outputNos[n]].unit.c_str());
438 
439  out = model;
440 
441  QString outputEventTypeURI = description.getOutputEventTypeURI(outputId);
442  out->setRDFTypeURI(outputEventTypeURI);
443 
444  } else {
445 
446  // Anything that is not a 1D, note, or interval model and that
447  // has a fixed sample rate and more than one value per result
448  // must be a dense 3D model.
449 
452  (modelRate, modelResolution, binCount,
454  false);
455 
456  if (!m_descriptors[n]->binNames.empty()) {
457  std::vector<QString> names;
458  for (int i = 0; i < (int)m_descriptors[n]->binNames.size(); ++i) {
459  names.push_back(m_descriptors[n]->binNames[i].c_str());
460  }
461  model->setBinNames(names);
462  }
463 
464  out = model;
465 
466  QString outputSignalTypeURI = description.getOutputSignalTypeURI(outputId);
467  out->setRDFTypeURI(outputSignalTypeURI);
468  }
469 
470  if (out) {
471  out->setSourceModel(input);
472  m_outputs.push_back(out);
473  }
474 }
475 
477 {
478 // SVDEBUG << "FeatureExtractionModelTransformer::~FeatureExtractionModelTransformer()" << endl;
479  delete m_plugin;
480  for (int j = 0; j < (int)m_descriptors.size(); ++j) {
481  delete m_descriptors[j];
482  }
483 }
484 
487 {
488  Models mm;
489  for (AdditionalModelMap::iterator i = m_additionalModels.begin();
490  i != m_additionalModels.end(); ++i) {
491  for (std::map<int, SparseTimeValueModel *>::iterator j =
492  i->second.begin();
493  j != i->second.end(); ++j) {
494  SparseTimeValueModel *m = j->second;
495  if (m) mm.push_back(m);
496  }
497  }
498  return mm;
499 }
500 
501 bool
503 {
504  for (std::map<int, bool>::const_iterator i =
505  m_needAdditionalModels.begin();
506  i != m_needAdditionalModels.end(); ++i) {
507  if (i->second) return true;
508  }
509  return false;
510 }
511 
514 {
515 // std::cerr << "getAdditionalModel(" << n << ", " << binNo << ")" << std::endl;
516 
517  if (binNo == 0) {
518  std::cerr << "Internal error: binNo == 0 in getAdditionalModel (should be using primary model)" << std::endl;
519  return 0;
520  }
521 
522  if (!m_needAdditionalModels[n]) return 0;
523  if (!isOutput<SparseTimeValueModel>(n)) return 0;
524  if (m_additionalModels[n][binNo]) return m_additionalModels[n][binNo];
525 
526  std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): creating" << std::endl;
527 
528  SparseTimeValueModel *baseModel = getConformingOutput<SparseTimeValueModel>(n);
529  if (!baseModel) return 0;
530 
531  std::cerr << "getAdditionalModel(" << n << ", " << binNo << "): (from " << baseModel << ")" << std::endl;
532 
533  SparseTimeValueModel *additional =
534  new SparseTimeValueModel(baseModel->getSampleRate(),
535  baseModel->getResolution(),
536  baseModel->getValueMinimum(),
537  baseModel->getValueMaximum(),
538  false);
539 
540  additional->setScaleUnits(baseModel->getScaleUnits());
541  additional->setRDFTypeURI(baseModel->getRDFTypeURI());
542 
543  m_additionalModels[n][binNo] = additional;
544  return additional;
545 }
546 
549 {
550 // SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: input model is " << getInputModel() << endl;
551 
552  DenseTimeValueModel *dtvm =
553  dynamic_cast<DenseTimeValueModel *>(getInputModel());
554  if (!dtvm) {
555  SVDEBUG << "FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl;
556  }
557  return dtvm;
558 }
559 
560 void
562 {
564  if (!input) return;
565 
566  if (m_outputs.empty()) return;
567 
568  Transform primaryTransform = m_transforms[0];
569 
570  while (!input->isReady() && !m_abandoned) {
571  cerr << "FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl;
572  usleep(500000);
573  }
574  if (m_abandoned) return;
575 
576  int sampleRate = input->getSampleRate();
577 
578  int channelCount = input->getChannelCount();
579  if ((int)m_plugin->getMaxChannelCount() < channelCount) {
580  channelCount = 1;
581  }
582 
583  float **buffers = new float*[channelCount];
584  for (int ch = 0; ch < channelCount; ++ch) {
585  buffers[ch] = new float[primaryTransform.getBlockSize() + 2];
586  }
587 
588  int stepSize = primaryTransform.getStepSize();
589  int blockSize = primaryTransform.getBlockSize();
590 
591  bool frequencyDomain = (m_plugin->getInputDomain() ==
592  Vamp::Plugin::FrequencyDomain);
593  std::vector<FFTModel *> fftModels;
594 
595  if (frequencyDomain) {
596  for (int ch = 0; ch < channelCount; ++ch) {
597  FFTModel *model = new FFTModel
599  channelCount == 1 ? m_input.getChannel() : ch,
600  primaryTransform.getWindowType(),
601  blockSize,
602  stepSize,
603  blockSize,
604  false,
606  if (!model->isOK()) {
607  delete model;
608  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
609  setCompletion(j, 100);
610  }
612  throw AllocationFailed("Failed to create the FFT model for this feature extraction model transformer");
613  }
614  model->resume();
615  fftModels.push_back(model);
616  }
617  }
618 
619  long startFrame = m_input.getModel()->getStartFrame();
620  long endFrame = m_input.getModel()->getEndFrame();
621 
622  RealTime contextStartRT = primaryTransform.getStartTime();
623  RealTime contextDurationRT = primaryTransform.getDuration();
624 
625  long contextStart =
626  RealTime::realTime2Frame(contextStartRT, sampleRate);
627 
628  long contextDuration =
629  RealTime::realTime2Frame(contextDurationRT, sampleRate);
630 
631  if (contextStart == 0 || contextStart < startFrame) {
632  contextStart = startFrame;
633  }
634 
635  if (contextDuration == 0) {
636  contextDuration = endFrame - contextStart;
637  }
638  if (contextStart + contextDuration > endFrame) {
639  contextDuration = endFrame - contextStart;
640  }
641 
642  long blockFrame = contextStart;
643 
644  long prevCompletion = 0;
645 
646  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
647  setCompletion(j, 0);
648  }
649 
650  float *reals = 0;
651  float *imaginaries = 0;
652  if (frequencyDomain) {
653  reals = new float[blockSize/2 + 1];
654  imaginaries = new float[blockSize/2 + 1];
655  }
656 
657  QString error = "";
658 
659  while (!m_abandoned) {
660 
661  if (frequencyDomain) {
662  if (blockFrame - int(blockSize)/2 >
663  contextStart + contextDuration) break;
664  } else {
665  if (blockFrame >=
666  contextStart + contextDuration) break;
667  }
668 
669 // SVDEBUG << "FeatureExtractionModelTransformer::run: blockFrame "
670 // << blockFrame << ", endFrame " << endFrame << ", blockSize "
671 // << blockSize << endl;
672 
673  long completion =
674  (((blockFrame - contextStart) / stepSize) * 99) /
675  (contextDuration / stepSize + 1);
676 
677  // channelCount is either m_input.getModel()->channelCount or 1
678 
679  if (frequencyDomain) {
680  for (int ch = 0; ch < channelCount; ++ch) {
681  int column = (blockFrame - startFrame) / stepSize;
682  fftModels[ch]->getValuesAt(column, reals, imaginaries);
683  for (int i = 0; i <= blockSize/2; ++i) {
684  buffers[ch][i*2] = reals[i];
685  buffers[ch][i*2+1] = imaginaries[i];
686  }
687  error = fftModels[ch]->getError();
688  if (error != "") {
689  cerr << "FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl;
690  m_abandoned = true;
691  m_message = error;
692  }
693  }
694  } else {
695  getFrames(channelCount, blockFrame, blockSize, buffers);
696  }
697 
698  if (m_abandoned) break;
699 
700  Vamp::Plugin::FeatureSet features = m_plugin->process
701  (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
702 
703  if (m_abandoned) break;
704 
705  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
706  for (int fi = 0; fi < (int)features[m_outputNos[j]].size(); ++fi) {
707  Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
708  addFeature(j, blockFrame, feature);
709  }
710  }
711 
712  if (blockFrame == contextStart || completion > prevCompletion) {
713  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
714  setCompletion(j, completion);
715  }
716  prevCompletion = completion;
717  }
718 
719  blockFrame += stepSize;
720  }
721 
722  if (!m_abandoned) {
723  Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();
724 
725  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
726  for (int fi = 0; fi < (int)features[m_outputNos[j]].size(); ++fi) {
727  Vamp::Plugin::Feature feature = features[m_outputNos[j]][fi];
728  addFeature(j, blockFrame, feature);
729  }
730  }
731  }
732 
733  for (int j = 0; j < (int)m_outputNos.size(); ++j) {
734  setCompletion(j, 100);
735  }
736 
737  if (frequencyDomain) {
738  for (int ch = 0; ch < channelCount; ++ch) {
739  delete fftModels[ch];
740  }
741  delete[] reals;
742  delete[] imaginaries;
743  }
744 
745  for (int ch = 0; ch < channelCount; ++ch) {
746  delete[] buffers[ch];
747  }
748  delete[] buffers;
749 }
750 
751 void
753  long startFrame, long size,
754  float **buffers)
755 {
756  long offset = 0;
757 
758  if (startFrame < 0) {
759  for (int c = 0; c < channelCount; ++c) {
760  for (int i = 0; i < size && startFrame + i < 0; ++i) {
761  buffers[c][i] = 0.0f;
762  }
763  }
764  offset = -startFrame;
765  size -= offset;
766  if (size <= 0) return;
767  startFrame = 0;
768  }
769 
771  if (!input) return;
772 
773  long got = 0;
774 
775  if (channelCount == 1) {
776 
777  got = input->getData(m_input.getChannel(), startFrame, size,
778  buffers[0] + offset);
779 
780  if (m_input.getChannel() == -1 && input->getChannelCount() > 1) {
781  // use mean instead of sum, as plugin input
782  float cc = float(input->getChannelCount());
783  for (long i = 0; i < size; ++i) {
784  buffers[0][i + offset] /= cc;
785  }
786  }
787 
788  } else {
789 
790  float **writebuf = buffers;
791  if (offset > 0) {
792  writebuf = new float *[channelCount];
793  for (int i = 0; i < channelCount; ++i) {
794  writebuf[i] = buffers[i] + offset;
795  }
796  }
797 
798  got = input->getData(0, channelCount-1, startFrame, size, writebuf);
799 
800  if (writebuf != buffers) delete[] writebuf;
801  }
802 
803  while (got < size) {
804  for (int c = 0; c < channelCount; ++c) {
805  buffers[c][got + offset] = 0.0;
806  }
807  ++got;
808  }
809 }
810 
811 void
813  int blockFrame,
814  const Vamp::Plugin::Feature &feature)
815 {
816  int inputRate = m_input.getModel()->getSampleRate();
817 
818 // cerr << "FeatureExtractionModelTransformer::addFeature: blockFrame = "
819 // << blockFrame << ", hasTimestamp = " << feature.hasTimestamp
820 // << ", timestamp = " << feature.timestamp << ", hasDuration = "
821 // << feature.hasDuration << ", duration = " << feature.duration
822 // << endl;
823 
824  int frame = blockFrame;
825 
826  if (m_descriptors[n]->sampleType ==
827  Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
828 
829  if (!feature.hasTimestamp) {
830  cerr
831  << "WARNING: FeatureExtractionModelTransformer::addFeature: "
832  << "Feature has variable sample rate but no timestamp!"
833  << endl;
834  return;
835  } else {
836  frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
837  }
838 
839  } else if (m_descriptors[n]->sampleType ==
840  Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
841 
842  if (!feature.hasTimestamp) {
844  } else {
845  RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
847  lrint(ts.toDouble() * m_descriptors[n]->sampleRate);
848  }
849 
850 // cerr << "m_fixedRateFeatureNo = " << m_fixedRateFeatureNo
851 // << ", m_descriptor->sampleRate = " << m_descriptor->sampleRate
852 // << ", inputRate = " << inputRate
853 // << " giving frame = ";
854  frame = lrintf((m_fixedRateFeatureNos[n] / m_descriptors[n]->sampleRate)
855  * int(inputRate));
856  }
857 
858  if (frame < 0) {
859  cerr
860  << "WARNING: FeatureExtractionModelTransformer::addFeature: "
861  << "Negative frame counts are not supported (frame = " << frame
862  << " from timestamp " << feature.timestamp
863  << "), dropping feature"
864  << endl;
865  return;
866  }
867 
868  // Rather than repeat the complicated tests from the constructor
869  // to determine what sort of model we must be adding the features
870  // to, we instead test what sort of model the constructor decided
871  // to create.
872 
873  if (isOutput<SparseOneDimensionalModel>(n)) {
874 
876  getConformingOutput<SparseOneDimensionalModel>(n);
877  if (!model) return;
878 
880  (frame, feature.label.c_str()));
881 
882  } else if (isOutput<SparseTimeValueModel>(n)) {
883 
884  SparseTimeValueModel *model =
885  getConformingOutput<SparseTimeValueModel>(n);
886  if (!model) return;
887 
888  for (int i = 0; i < (int)feature.values.size(); ++i) {
889 
890  float value = feature.values[i];
891 
892  QString label = feature.label.c_str();
893  if (feature.values.size() > 1) {
894  label = QString("[%1] %2").arg(i+1).arg(label);
895  }
896 
897  SparseTimeValueModel *targetModel = model;
898 
899  if (m_needAdditionalModels[n] && i > 0) {
900  targetModel = getAdditionalModel(n, i);
901  if (!targetModel) targetModel = model;
902 // std::cerr << "adding point to model " << targetModel
903 // << " for output " << n << " bin " << i << std::endl;
904  }
905 
906  targetModel->addPoint
907  (SparseTimeValueModel::Point(frame, value, label));
908  }
909 
910  } else if (isOutput<FlexiNoteModel>(n) || isOutput<NoteModel>(n) || isOutput<RegionModel>(n)) { //GF: Added Note Model
911 
912  int index = 0;
913 
914  float value = 0.0;
915  if ((int)feature.values.size() > index) {
916  value = feature.values[index++];
917  }
918 
919  float duration = 1;
920  if (feature.hasDuration) {
921  duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
922  } else {
923  if ((int)feature.values.size() > index) {
924  duration = feature.values[index++];
925  }
926  }
927 
928  if (isOutput<FlexiNoteModel>(n)) { // GF: added for flexi note model
929 
930  float velocity = 100;
931  if ((int)feature.values.size() > index) {
932  velocity = feature.values[index++];
933  }
934  if (velocity < 0) velocity = 127;
935  if (velocity > 127) velocity = 127;
936 
937  FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
938  if (!model) return;
939  model->addPoint(FlexiNoteModel::Point(frame, value, // value is pitch
940  lrintf(duration),
941  velocity / 127.f,
942  feature.label.c_str()));
943  // GF: end -- added for flexi note model
944  } else if (isOutput<NoteModel>(n)) {
945 
946  float velocity = 100;
947  if ((int)feature.values.size() > index) {
948  velocity = feature.values[index++];
949  }
950  if (velocity < 0) velocity = 127;
951  if (velocity > 127) velocity = 127;
952 
953  NoteModel *model = getConformingOutput<NoteModel>(n);
954  if (!model) return;
955  model->addPoint(NoteModel::Point(frame, value, // value is pitch
956  lrintf(duration),
957  velocity / 127.f,
958  feature.label.c_str()));
959  } else {
960 
961  RegionModel *model = getConformingOutput<RegionModel>(n);
962  if (!model) return;
963 
964  if (feature.hasDuration && !feature.values.empty()) {
965 
966  for (int i = 0; i < (int)feature.values.size(); ++i) {
967 
968  float value = feature.values[i];
969 
970  QString label = feature.label.c_str();
971  if (feature.values.size() > 1) {
972  label = QString("[%1] %2").arg(i+1).arg(label);
973  }
974 
975  model->addPoint(RegionModel::Point(frame, value,
976  lrintf(duration),
977  label));
978  }
979  } else {
980 
981  model->addPoint(RegionModel::Point(frame, value,
982  lrintf(duration),
983  feature.label.c_str()));
984  }
985  }
986 
987  } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
988 
990  DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
991 
993  getConformingOutput<EditableDenseThreeDimensionalModel>(n);
994  if (!model) return;
995 
996 // cerr << "(note: model resolution = " << model->getResolution() << ")"
997 // << endl;
998 
999  if (!feature.hasTimestamp && m_fixedRateFeatureNos[n] >= 0) {
1000  model->setColumn(m_fixedRateFeatureNos[n], values);
1001  } else {
1002  model->setColumn(frame / model->getResolution(), values);
1003  }
1004 
1005  } else {
1006  SVDEBUG << "FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl;
1007  }
1008 }
1009 
1010 void
1012 {
1013 // SVDEBUG << "FeatureExtractionModelTransformer::setCompletion("
1014 // << completion << ")" << endl;
1015 
1016  if (isOutput<SparseOneDimensionalModel>(n)) {
1017 
1018  SparseOneDimensionalModel *model =
1019  getConformingOutput<SparseOneDimensionalModel>(n);
1020  if (!model) return;
1021  if (model->isAbandoning()) abandon();
1022  model->setCompletion(completion, true);
1023 
1024  } else if (isOutput<SparseTimeValueModel>(n)) {
1025 
1026  SparseTimeValueModel *model =
1027  getConformingOutput<SparseTimeValueModel>(n);
1028  if (!model) return;
1029  if (model->isAbandoning()) abandon();
1030  model->setCompletion(completion, true);
1031 
1032  } else if (isOutput<NoteModel>(n)) {
1033 
1034  NoteModel *model = getConformingOutput<NoteModel>(n);
1035  if (!model) return;
1036  if (model->isAbandoning()) abandon();
1037  model->setCompletion(completion, true);
1038 
1039  } else if (isOutput<FlexiNoteModel>(n)) {
1040 
1041  FlexiNoteModel *model = getConformingOutput<FlexiNoteModel>(n);
1042  if (!model) return;
1043  if (model->isAbandoning()) abandon();
1044  model->setCompletion(completion, true);
1045 
1046  } else if (isOutput<RegionModel>(n)) {
1047 
1048  RegionModel *model = getConformingOutput<RegionModel>(n);
1049  if (!model) return;
1050  if (model->isAbandoning()) abandon();
1051  model->setCompletion(completion, true);
1052 
1053  } else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
1054 
1056  getConformingOutput<EditableDenseThreeDimensionalModel>(n);
1057  if (!model) return;
1058  if (model->isAbandoning()) abandon();
1059  model->setCompletion(completion, true);
1060  }
1061 }
1062 
void abandon()
Hint to the processing thread that it should give up, for example because the process is going to exi...
virtual int getChannelCount() const =0
Return the number of distinct channels for this model.
QString getPluginVersion() const
Definition: Transform.cpp:287
virtual Vamp::Plugin * instantiatePlugin(QString identifier, float inputSampleRate)
bool willHaveAdditionalOutputModels()
Return true if the current transform is one that may produce additional models (to be retrieved throu...
Model * getModel() const
std::vector< Transform > Transforms
Definition: Transform.h:200
virtual float getValueMaximum() const
void setRDFTypeURI(QString uri)
Set the event, feature, or signal type URI for the features contained in this model,...
Definition: Model.h:223
virtual float getValueMinimum() const
virtual int getSampleRate() const
Return the frame rate in frames per second.
Definition: SparseModel.h:53
WindowType getWindowType() const
Definition: Transform.cpp:347
NoteModel – a concrete IntervalModel for notes.
Definition: NoteModel.h:38
RegionModel – a concrete IntervalModel for intervals associated with a value, which we call regions f...
Definition: RegionModel.h:36
FlexiNoteModel – a concrete IntervalModel for notes.
virtual int getStartFrame() const =0
Return the first audio frame spanned by the model.
An implementation of DenseThreeDimensionalModel that makes FFT data derived from a DenseTimeValueMode...
Definition: FFTModel.h:33
SparseTimeValueModel * getAdditionalModel(int transformNo, int binNo)
void makeContextConsistentWithPlugin(Transform &transform, Vamp::PluginBase *plugin)
If the given Transform object has no processing step and block sizes set, set them to appropriate def...
virtual int getData(int channel, int start, int count, float *buffer) const =0
Get the specified set of samples from the given channel of the model in single-precision floating-poi...
virtual void addPoint(const PointType &point)
Add a point.
Definition: SparseModel.h:704
void getFrames(int channelCount, long startFrame, long size, float **buffer)
virtual void setCompletion(int completion, bool update=true)
QString getOutputSignalTypeURI(QString outputId) const
Models getAdditionalOutputModels()
Return any additional models that were created during processing.
virtual bool isReady(int *completion=0) const
Return true if the model has finished loading or calculating all its data, for a model that is capabl...
Definition: Model.h:142
std::vector< Model * > Models
QString getOutput() const
Definition: Transform.cpp:224
static TransformFactory * getInstance()
Transforms m_transforms
static bool areTransformsSimilar(const Transform &t1, const Transform &t2)
Time/value point type for use in a SparseModel or SparseValueModel.
virtual QString getScaleUnits() const
QString getOutputEventTypeURI(QString outputId) const
void setOutput(QString output)
Definition: Transform.cpp:236
virtual void setSourceModel(Model *model)
Set the source model for this model.
Definition: Model.cpp:42
void setPluginParameters(const Transform &transform, Vamp::PluginBase *plugin)
Set the parameters, program and configuration strings on the given plugin from the given Transform ob...
virtual void setScaleUnits(QString units)
virtual void addPoint(const Point &point)
Add a point.
Definition: RegionModel.h:206
FeatureExtractionModelTransformer(Input input, const Transform &transform)
RealTime getStartTime() const
Definition: Transform.cpp:359
virtual void addPoint(const PointType &point)
Add a point.
virtual int getSampleRate() const =0
Return the frame rate in frames per second.
virtual void setBinNames(std::vector< QString > names)
Set the names of all bins.
void addFeature(int n, int blockFrame, const Vamp::Plugin::Feature &feature)
virtual int getResolution() const
Return the number of sample frames covered by each set of bins.
Model is the base class for all data models that represent any sort of data on a time scale based on ...
Definition: Model.h:35
int getStepSize() const
Definition: Transform.cpp:323
virtual int getResolution() const
Definition: SparseModel.h:62
virtual void resume()
Definition: FFTModel.h:186
void setBlockSize(int s)
Definition: Transform.cpp:341
#define SVDEBUG
Definition: Debug.h:42
Base class for models containing dense two-dimensional data (value against time).
virtual bool isOK() const
Return true if the model was constructed successfully.
Definition: FFTModel.h:120
virtual int getEndFrame() const =0
Return the last audio frame spanned by the model.
static long realTime2Frame(const RealTime &r, unsigned int sampleRate)
Convert a RealTime into a sample frame at the given sample rate.
Definition: RealTime.cpp:442
std::vector< Vamp::Plugin::OutputDescriptor * > m_descriptors
double toDouble() const
Definition: RealTime.cpp:159
A ModelTransformer turns one data model into another.
virtual void setCompletion(int completion, bool update=true)
Definition: SparseModel.h:761
QString getRDFTypeURI() const
Retrieve the event, feature, or signal type URI for the features contained in this model,...
Definition: Model.h:230
int getBlockSize() const
Definition: Transform.cpp:335
Model * getInputModel()
Return the input model for the transform.
QString getPluginIdentifier() const
Definition: Transform.cpp:218
virtual void setColumn(int x, const Column &values)
Set the entire set of bin values at the given column.
void setStepSize(int s)
Definition: Transform.cpp:329
static FeatureExtractionPluginFactory * instanceFor(QString identifier)
virtual bool isAbandoning() const
Query whether the model has been marked as abandoning.
Definition: Model.h:125
RealTime represents time values to nanosecond precision with accurate arithmetic and frame-rate conve...
Definition: RealTime.h:35
RealTime getDuration() const
Definition: Transform.cpp:371