20 #include <vamp-hostsdk/Plugin.h> 80 m_message = tr(
"Transforms supplied to a single FeatureExtractionModelTransformer instance must be similar in every respect except plugin output");
93 m_message = tr(
"No factory available for feature extraction plugin id \"%1\" (unknown plugin type, or internal error?)").arg(pluginId);
99 m_message = tr(
"Input model for feature extraction plugin \"%1\" is of wrong type (internal error?)").arg(pluginId);
105 m_message = tr(
"Failed to instantiate plugin \"%1\"").arg(pluginId);
116 if ((
int)
m_plugin->getMaxChannelCount() < channelCount) {
119 if ((
int)
m_plugin->getMinChannelCount() > channelCount) {
120 m_message = tr(
"Cannot provide enough channels to feature extraction plugin \"%1\" (plugin min is %2, max %3; input model has %4)")
122 .arg(
m_plugin->getMinChannelCount())
123 .arg(
m_plugin->getMaxChannelCount())
128 SVDEBUG <<
"Initialising feature extraction plugin with channels = " 129 << channelCount <<
", step = " << primaryTransform.
getStepSize()
130 <<
", block = " << primaryTransform.
getBlockSize() << endl;
132 if (!
m_plugin->initialise(channelCount,
148 if (!
m_plugin->initialise(channelCount,
152 m_message = tr(
"Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
157 m_message = tr(
"Feature extraction plugin \"%1\" rejected the given step and block sizes (%2 and %3); using plugin defaults (%4 and %5) instead")
167 m_message = tr(
"Failed to initialise feature extraction plugin \"%1\"").arg(pluginId);
173 QString pv = QString(
"%1").arg(
m_plugin->getPluginVersion());
175 QString vm = tr(
"Transform was configured for version %1 of plugin \"%2\", but the plugin being used is version %3")
187 Vamp::Plugin::OutputList outputs =
m_plugin->getOutputDescriptors();
189 if (outputs.empty()) {
190 m_message = tr(
"Plugin \"%1\" has no outputs").arg(pluginId);
196 for (
int i = 0; i < (int)outputs.size(); ++i) {
199 outputs[i].identifier ==
m_transforms[j].getOutput().toStdString()) {
201 m_descriptors.push_back(
new Vamp::Plugin::OutputDescriptor(outputs[i]));
208 m_message = tr(
"Plugin \"%1\" has no output named \"%2\"")
233 float minValue = 0.0, maxValue = 0.0;
234 bool haveExtents =
false;
253 int modelResolution = 1;
256 Vamp::Plugin::OutputDescriptor::OneSamplePerStep) {
258 cerr <<
"WARNING: plugin reports output sample rate as " 259 <<
m_descriptors[n]->sampleRate <<
" (can't display features with finer resolution than the input rate of " << input->
getSampleRate() <<
")" << endl;
265 case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
267 modelResolution = int(modelRate /
m_descriptors[n]->sampleRate + 0.001);
271 case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
275 case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
290 bool preDurationPlugin = (
m_plugin->getVampApiVersion() < 2);
303 }
else if ((preDurationPlugin && binCount > 1 &&
305 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) ||
328 bool isNoteModel =
false;
333 if (binCount > 1) isNoteModel =
true;
348 settings.beginGroup(
"Transformer");
349 bool flexi = settings.value(
"use-flexi-note-model",
false).toBool();
352 cerr <<
"flexi = " << flexi << endl;
354 if (isNoteModel && !flexi) {
359 (modelRate, modelResolution, minValue, maxValue,
false);
362 (modelRate, modelResolution,
false);
367 }
else if (isNoteModel && flexi) {
372 (modelRate, modelResolution, minValue, maxValue,
false);
375 (modelRate, modelResolution,
false);
385 (modelRate, modelResolution, minValue, maxValue,
false);
388 (modelRate, modelResolution,
false);
397 }
else if (binCount == 1 ||
399 Vamp::Plugin::OutputDescriptor::VariableSampleRate)) {
423 if (!haveBinCount || binCount > 1) {
430 (modelRate, modelResolution, minValue, maxValue,
false);
433 (modelRate, modelResolution,
false);
436 Vamp::Plugin::OutputList outputs =
m_plugin->getOutputDescriptors();
452 (modelRate, modelResolution, binCount,
457 std::vector<QString> names;
458 for (
int i = 0; i < (int)
m_descriptors[n]->binNames.size(); ++i) {
491 for (std::map<int, SparseTimeValueModel *>::iterator j =
493 j != i->second.end(); ++j) {
495 if (m) mm.push_back(m);
504 for (std::map<int, bool>::const_iterator i =
507 if (i->second)
return true;
518 std::cerr <<
"Internal error: binNo == 0 in getAdditionalModel (should be using primary model)" << std::endl;
523 if (!isOutput<SparseTimeValueModel>(n))
return 0;
526 std::cerr <<
"getAdditionalModel(" << n <<
", " << binNo <<
"): creating" << std::endl;
529 if (!baseModel)
return 0;
531 std::cerr <<
"getAdditionalModel(" << n <<
", " << binNo <<
"): (from " << baseModel <<
")" << std::endl;
555 SVDEBUG <<
"FeatureExtractionModelTransformer::getConformingInput: WARNING: Input model is not conformable to DenseTimeValueModel" << endl;
571 cerr <<
"FeatureExtractionModelTransformer::run: Waiting for input model to be ready..." << endl;
579 if ((
int)
m_plugin->getMaxChannelCount() < channelCount) {
583 float **buffers =
new float*[channelCount];
584 for (
int ch = 0; ch < channelCount; ++ch) {
585 buffers[ch] =
new float[primaryTransform.
getBlockSize() + 2];
591 bool frequencyDomain = (
m_plugin->getInputDomain() ==
592 Vamp::Plugin::FrequencyDomain);
593 std::vector<FFTModel *> fftModels;
595 if (frequencyDomain) {
596 for (
int ch = 0; ch < channelCount; ++ch) {
606 if (!model->
isOK()) {
608 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
612 throw AllocationFailed(
"Failed to create the FFT model for this feature extraction model transformer");
615 fftModels.push_back(model);
628 long contextDuration =
631 if (contextStart == 0 || contextStart < startFrame) {
632 contextStart = startFrame;
635 if (contextDuration == 0) {
636 contextDuration = endFrame - contextStart;
638 if (contextStart + contextDuration > endFrame) {
639 contextDuration = endFrame - contextStart;
642 long blockFrame = contextStart;
644 long prevCompletion = 0;
646 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
651 float *imaginaries = 0;
652 if (frequencyDomain) {
653 reals =
new float[blockSize/2 + 1];
654 imaginaries =
new float[blockSize/2 + 1];
661 if (frequencyDomain) {
662 if (blockFrame -
int(blockSize)/2 >
663 contextStart + contextDuration)
break;
666 contextStart + contextDuration)
break;
674 (((blockFrame - contextStart) / stepSize) * 99) /
675 (contextDuration / stepSize + 1);
679 if (frequencyDomain) {
680 for (
int ch = 0; ch < channelCount; ++ch) {
681 int column = (blockFrame - startFrame) / stepSize;
682 fftModels[ch]->getValuesAt(column, reals, imaginaries);
683 for (
int i = 0; i <= blockSize/2; ++i) {
684 buffers[ch][i*2] = reals[i];
685 buffers[ch][i*2+1] = imaginaries[i];
687 error = fftModels[ch]->getError();
689 cerr <<
"FeatureExtractionModelTransformer::run: Abandoning, error is " << error << endl;
695 getFrames(channelCount, blockFrame, blockSize, buffers);
700 Vamp::Plugin::FeatureSet features =
m_plugin->process
701 (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));
705 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
706 for (
int fi = 0; fi < (int)features[
m_outputNos[j]].size(); ++fi) {
707 Vamp::Plugin::Feature feature = features[
m_outputNos[j]][fi];
712 if (blockFrame == contextStart || completion > prevCompletion) {
713 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
716 prevCompletion = completion;
719 blockFrame += stepSize;
723 Vamp::Plugin::FeatureSet features =
m_plugin->getRemainingFeatures();
725 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
726 for (
int fi = 0; fi < (int)features[
m_outputNos[j]].size(); ++fi) {
727 Vamp::Plugin::Feature feature = features[
m_outputNos[j]][fi];
733 for (
int j = 0; j < (int)
m_outputNos.size(); ++j) {
737 if (frequencyDomain) {
738 for (
int ch = 0; ch < channelCount; ++ch) {
739 delete fftModels[ch];
742 delete[] imaginaries;
745 for (
int ch = 0; ch < channelCount; ++ch) {
746 delete[] buffers[ch];
753 long startFrame,
long size,
758 if (startFrame < 0) {
759 for (
int c = 0; c < channelCount; ++c) {
760 for (
int i = 0; i < size && startFrame + i < 0; ++i) {
761 buffers[c][i] = 0.0f;
764 offset = -startFrame;
766 if (size <= 0)
return;
775 if (channelCount == 1) {
778 buffers[0] + offset);
783 for (
long i = 0; i < size; ++i) {
784 buffers[0][i + offset] /= cc;
790 float **writebuf = buffers;
792 writebuf =
new float *[channelCount];
793 for (
int i = 0; i < channelCount; ++i) {
794 writebuf[i] = buffers[i] + offset;
798 got = input->
getData(0, channelCount-1, startFrame, size, writebuf);
800 if (writebuf != buffers)
delete[] writebuf;
804 for (
int c = 0; c < channelCount; ++c) {
805 buffers[c][got + offset] = 0.0;
814 const Vamp::Plugin::Feature &feature)
824 int frame = blockFrame;
827 Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
829 if (!feature.hasTimestamp) {
831 <<
"WARNING: FeatureExtractionModelTransformer::addFeature: " 832 <<
"Feature has variable sample rate but no timestamp!" 836 frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
840 Vamp::Plugin::OutputDescriptor::FixedSampleRate) {
842 if (!feature.hasTimestamp) {
845 RealTime ts(feature.timestamp.sec, feature.timestamp.nsec);
860 <<
"WARNING: FeatureExtractionModelTransformer::addFeature: " 861 <<
"Negative frame counts are not supported (frame = " << frame
862 <<
" from timestamp " << feature.timestamp
863 <<
"), dropping feature" 873 if (isOutput<SparseOneDimensionalModel>(n)) {
876 getConformingOutput<SparseOneDimensionalModel>(n);
880 (frame, feature.label.c_str()));
882 }
else if (isOutput<SparseTimeValueModel>(n)) {
885 getConformingOutput<SparseTimeValueModel>(n);
888 for (
int i = 0; i < (int)feature.values.size(); ++i) {
890 float value = feature.values[i];
892 QString label = feature.label.c_str();
893 if (feature.values.size() > 1) {
894 label = QString(
"[%1] %2").arg(i+1).arg(label);
901 if (!targetModel) targetModel = model;
910 }
else if (isOutput<FlexiNoteModel>(n) || isOutput<NoteModel>(n) || isOutput<RegionModel>(n)) {
915 if ((
int)feature.values.size() > index) {
916 value = feature.values[index++];
920 if (feature.hasDuration) {
921 duration = Vamp::RealTime::realTime2Frame(feature.duration, inputRate);
923 if ((
int)feature.values.size() > index) {
924 duration = feature.values[index++];
928 if (isOutput<FlexiNoteModel>(n)) {
930 float velocity = 100;
931 if ((
int)feature.values.size() > index) {
932 velocity = feature.values[index++];
934 if (velocity < 0) velocity = 127;
935 if (velocity > 127) velocity = 127;
942 feature.label.c_str()));
944 }
else if (isOutput<NoteModel>(n)) {
946 float velocity = 100;
947 if ((
int)feature.values.size() > index) {
948 velocity = feature.values[index++];
950 if (velocity < 0) velocity = 127;
951 if (velocity > 127) velocity = 127;
953 NoteModel *model = getConformingOutput<NoteModel>(n);
958 feature.label.c_str()));
961 RegionModel *model = getConformingOutput<RegionModel>(n);
964 if (feature.hasDuration && !feature.values.empty()) {
966 for (
int i = 0; i < (int)feature.values.size(); ++i) {
968 float value = feature.values[i];
970 QString label = feature.label.c_str();
971 if (feature.values.size() > 1) {
972 label = QString(
"[%1] %2").arg(i+1).arg(label);
983 feature.label.c_str()));
987 }
else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
990 DenseThreeDimensionalModel::Column::fromStdVector(feature.values);
993 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
1006 SVDEBUG <<
"FeatureExtractionModelTransformer::addFeature: Unknown output model type!" << endl;
1016 if (isOutput<SparseOneDimensionalModel>(n)) {
1019 getConformingOutput<SparseOneDimensionalModel>(n);
1024 }
else if (isOutput<SparseTimeValueModel>(n)) {
1027 getConformingOutput<SparseTimeValueModel>(n);
1032 }
else if (isOutput<NoteModel>(n)) {
1034 NoteModel *model = getConformingOutput<NoteModel>(n);
1039 }
else if (isOutput<FlexiNoteModel>(n)) {
1046 }
else if (isOutput<RegionModel>(n)) {
1048 RegionModel *model = getConformingOutput<RegionModel>(n);
1053 }
else if (isOutput<EditableDenseThreeDimensionalModel>(n)) {
1056 getConformingOutput<EditableDenseThreeDimensionalModel>(n);
virtual int getChannelCount() const =0
Return the number of distinct channels for this model.
virtual float getValueMaximum() const
void setRDFTypeURI(QString uri)
Set the event, feature, or signal type URI for the features contained in this model,...
virtual float getValueMinimum() const
virtual int getSampleRate() const
Return the frame rate in frames per second.
NoteModel – a concrete IntervalModel for notes.
RegionModel – a concrete IntervalModel for intervals associated with a value, which we call regions f...
FlexiNoteModel – a concrete IntervalModel for notes.
virtual int getStartFrame() const =0
Return the first audio frame spanned by the model.
An implementation of DenseThreeDimensionalModel that makes FFT data derived from a DenseTimeValueMode...
virtual int getData(int channel, int start, int count, float *buffer) const =0
Get the specified set of samples from the given channel of the model in single-precision floating-poi...
virtual void addPoint(const PointType &point)
Add a point.
virtual void setCompletion(int completion, bool update=true)
QString getOutputSignalTypeURI(QString outputId) const
virtual bool isReady(int *completion=0) const
Return true if the model has finished loading or calculating all its data, for a model that is capabl...
Time/value point type for use in a SparseModel or SparseValueModel.
virtual QString getScaleUnits() const
QString getOutputEventTypeURI(QString outputId) const
virtual void setSourceModel(Model *model)
Set the source model for this model.
virtual void setScaleUnits(QString units)
virtual void addPoint(const Point &point)
Add a point.
virtual void addPoint(const PointType &point)
Add a point.
virtual int getSampleRate() const =0
Return the frame rate in frames per second.
virtual void setBinNames(std::vector< QString > names)
Set the names of all bins.
virtual int getResolution() const
Return the number of sample frames covered by each set of bins.
Model is the base class for all data models that represent any sort of data on a time scale based on ...
virtual int getResolution() const
Base class for models containing dense two-dimensional data (value against time).
virtual bool isOK() const
Return true if the model was constructed successfully.
virtual int getEndFrame() const =0
Return the last audio frame spanned by the model.
static long realTime2Frame(const RealTime &r, unsigned int sampleRate)
Convert a RealTime into a sample frame at the given sample rate.
virtual void setCompletion(int completion, bool update=true)
QString getRDFTypeURI() const
Retrieve the event, feature, or signal type URI for the features contained in this model,...
virtual void setColumn(int x, const Column &values)
Set the entire set of bin values at the given column.
virtual bool isAbandoning() const
Query whether the model has been marked as abandoning.
RealTime represents time values to nanosecond precision with accurate arithmetic and frame-rate conve...