svcore  1.9
FFTModel.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "FFTModel.h"
17 #include "DenseTimeValueModel.h"
18 #include "AggregateWaveModel.h"
19 
20 #include "base/Profiler.h"
21 #include "base/Pitch.h"
22 
23 #include <algorithm>
24 
25 #include <cassert>
26 
27 #ifndef __GNUC__
28 #include <alloca.h>
29 #endif
30 
32  int channel,
33  WindowType windowType,
34  int windowSize,
35  int windowIncrement,
36  int fftSize,
37  bool polar,
38  StorageAdviser::Criteria criteria,
39  int fillFromColumn) :
41  m_server(0),
42  m_xshift(0),
43  m_yshift(0)
44 {
45  setSourceModel(const_cast<DenseTimeValueModel *>(model));
46 
47  m_server = getServer(model,
48  channel,
49  windowType,
50  windowSize,
51  windowIncrement,
52  fftSize,
53  polar,
54  criteria,
55  fillFromColumn);
56 
57  if (!m_server) return; // caller should check isOK()
58 
59  int xratio = windowIncrement / m_server->getWindowIncrement();
60  int yratio = m_server->getFFTSize() / fftSize;
61 
62  while (xratio > 1) {
63  if (xratio & 0x1) {
64  cerr << "ERROR: FFTModel: Window increment ratio "
65  << windowIncrement << " / "
67  << " must be a power of two" << endl;
68  assert(!(xratio & 0x1));
69  }
70  ++m_xshift;
71  xratio >>= 1;
72  }
73 
74  while (yratio > 1) {
75  if (yratio & 0x1) {
76  cerr << "ERROR: FFTModel: FFT size ratio "
77  << m_server->getFFTSize() << " / " << fftSize
78  << " must be a power of two" << endl;
79  assert(!(yratio & 0x1));
80  }
81  ++m_yshift;
82  yratio >>= 1;
83  }
84 }
85 
87 {
89 }
90 
91 void
93 {
94  if (m_sourceModel) {
95  cerr << "FFTModel[" << this << "]::sourceModelAboutToBeDeleted(" << m_sourceModel << ")" << endl;
96  if (m_server) {
98  m_server = 0;
99  }
101  }
102 }
103 
106  int channel,
107  WindowType windowType,
108  int windowSize,
109  int windowIncrement,
110  int fftSize,
111  bool polar,
112  StorageAdviser::Criteria criteria,
113  int fillFromColumn)
114 {
115  // Obviously, an FFT model of channel C (where C != -1) of an
116  // aggregate model is the same as the FFT model of the appropriate
117  // channel of whichever model that aggregate channel is drawn
118  // from. We should use that model here, in case we already have
119  // the data for it or will be wanting the same data again later.
120 
121  // If the channel is -1 (i.e. mixture of all channels), then we
122  // can't do this shortcut unless the aggregate model only has one
123  // channel or contains exactly all of the channels of a single
124  // other model. That isn't very likely -- if it were the case,
125  // why would we be using an aggregate model?
126 
127  if (channel >= 0) {
128 
129  const AggregateWaveModel *aggregate =
130  dynamic_cast<const AggregateWaveModel *>(model);
131 
132  if (aggregate && channel < aggregate->getComponentCount()) {
133 
135  aggregate->getComponent(channel);
136 
137  return getServer(spec.model,
138  spec.channel,
139  windowType,
140  windowSize,
141  windowIncrement,
142  fftSize,
143  polar,
144  criteria,
145  fillFromColumn);
146  }
147  }
148 
149  // The normal case
150 
151  return FFTDataServer::getFuzzyInstance(model,
152  channel,
153  windowType,
154  windowSize,
155  windowIncrement,
156  fftSize,
157  polar,
158  criteria,
159  fillFromColumn);
160 }
161 
162 int
164 {
165  return isOK() ? m_server->getModel()->getSampleRate() : 0;
166 }
167 
170 {
171  Profiler profiler("FFTModel::getColumn", false);
172 
173  Column result;
174 
175  result.clear();
176  int h = getHeight();
177  result.reserve(h);
178 
179 #ifdef __GNUC__
180  float magnitudes[h];
181 #else
182  float *magnitudes = (float *)alloca(h * sizeof(float));
183 #endif
184 
185  if (m_server->getMagnitudesAt(x << m_xshift, magnitudes)) {
186 
187  for (int y = 0; y < h; ++y) {
188  result.push_back(magnitudes[y]);
189  }
190 
191  } else {
192  for (int i = 0; i < h; ++i) result.push_back(0.f);
193  }
194 
195  return result;
196 }
197 
198 QString
200 {
201  int sr = getSampleRate();
202  if (!sr) return "";
203  QString name = tr("%1 Hz").arg((n * sr) / ((getHeight()-1) * 2));
204  return name;
205 }
206 
207 bool
208 FFTModel::estimateStableFrequency(int x, int y, float &frequency)
209 {
210  if (!isOK()) return false;
211 
212  int sampleRate = m_server->getModel()->getSampleRate();
213 
214  int fftSize = m_server->getFFTSize() >> m_yshift;
215  frequency = (float(y) * sampleRate) / fftSize;
216 
217  if (x+1 >= getWidth()) return false;
218 
219  // At frequency f, a phase shift of 2pi (one cycle) happens in 1/f sec.
220  // At hopsize h and sample rate sr, one hop happens in h/sr sec.
221  // At window size w, for bin b, f is b*sr/w.
222  // thus 2pi phase shift happens in w/(b*sr) sec.
223  // We need to know what phase shift we expect from h/sr sec.
224  // -> 2pi * ((h/sr) / (w/(b*sr)))
225  // = 2pi * ((h * b * sr) / (w * sr))
226  // = 2pi * (h * b) / w.
227 
228  float oldPhase = getPhaseAt(x, y);
229  float newPhase = getPhaseAt(x+1, y);
230 
231  int incr = getResolution();
232 
233  float expectedPhase = oldPhase + (2.0 * M_PI * y * incr) / fftSize;
234 
235  float phaseError = princargf(newPhase - expectedPhase);
236 
237 // bool stable = (fabsf(phaseError) < (1.1f * (m_windowIncrement * M_PI) / m_fftSize));
238 
239  // The new frequency estimate based on the phase error resulting
240  // from assuming the "native" frequency of this bin
241 
242  frequency =
243  (sampleRate * (expectedPhase + phaseError - oldPhase)) /
244  (2 * M_PI * incr);
245 
246  return true;
247 }
248 
250 FFTModel::getPeaks(PeakPickType type, int x, int ymin, int ymax)
251 {
252  Profiler profiler("FFTModel::getPeaks");
253 
255  if (!isOK()) return peaks;
256 
257  if (ymax == 0 || ymax > getHeight() - 1) {
258  ymax = getHeight() - 1;
259  }
260 
261  if (type == AllPeaks) {
262  int minbin = ymin;
263  if (minbin > 0) minbin = minbin - 1;
264  int maxbin = ymax;
265  if (maxbin < getHeight() - 1) maxbin = maxbin + 1;
266  const int n = maxbin - minbin + 1;
267 #ifdef __GNUC__
268  float values[n];
269 #else
270  float *values = (float *)alloca(n * sizeof(float));
271 #endif
272  getMagnitudesAt(x, values, minbin, maxbin - minbin + 1);
273  for (int bin = ymin; bin <= ymax; ++bin) {
274  if (bin == minbin || bin == maxbin) continue;
275  if (values[bin - minbin] > values[bin - minbin - 1] &&
276  values[bin - minbin] > values[bin - minbin + 1]) {
277  peaks.insert(bin);
278  }
279  }
280  return peaks;
281  }
282 
283  Column values = getColumn(x);
284 
285  float mean = 0.f;
286  for (int i = 0; i < values.size(); ++i) mean += values[i];
287  if (values.size() >0) mean /= values.size();
288 
289  // For peak picking we use a moving median window, picking the
290  // highest value within each continuous region of values that
291  // exceed the median. For pitch adaptivity, we adjust the window
292  // size to a roughly constant pitch range (about four tones).
293 
294  int sampleRate = getSampleRate();
295 
296  std::deque<float> window;
297  std::vector<int> inrange;
298  float dist = 0.5;
299 
300  int medianWinSize = getPeakPickWindowSize(type, sampleRate, ymin, dist);
301  int halfWin = medianWinSize/2;
302 
303  int binmin;
304  if (ymin > halfWin) binmin = ymin - halfWin;
305  else binmin = 0;
306 
307  int binmax;
308  if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
309  else binmax = values.size()-1;
310 
311  int prevcentre = 0;
312 
313  for (int bin = binmin; bin <= binmax; ++bin) {
314 
315  float value = values[bin];
316 
317  window.push_back(value);
318 
319  // so-called median will actually be the dist*100'th percentile
320  medianWinSize = getPeakPickWindowSize(type, sampleRate, bin, dist);
321  halfWin = medianWinSize/2;
322 
323  while ((int)window.size() > medianWinSize) {
324  window.pop_front();
325  }
326 
327  int actualSize = window.size();
328 
329  if (type == MajorPitchAdaptivePeaks) {
330  if (ymax + halfWin < values.size()) binmax = ymax + halfWin;
331  else binmax = values.size()-1;
332  }
333 
334  std::deque<float> sorted(window);
335  std::sort(sorted.begin(), sorted.end());
336  float median = sorted[int(sorted.size() * dist)];
337 
338  int centrebin = 0;
339  if (bin > actualSize/2) centrebin = bin - actualSize/2;
340 
341  while (centrebin > prevcentre || bin == binmin) {
342 
343  if (centrebin > prevcentre) ++prevcentre;
344 
345  float centre = values[prevcentre];
346 
347  if (centre > median) {
348  inrange.push_back(centrebin);
349  }
350 
351  if (centre <= median || centrebin+1 == values.size()) {
352  if (!inrange.empty()) {
353  int peakbin = 0;
354  float peakval = 0.f;
355  for (int i = 0; i < (int)inrange.size(); ++i) {
356  if (i == 0 || values[inrange[i]] > peakval) {
357  peakval = values[inrange[i]];
358  peakbin = inrange[i];
359  }
360  }
361  inrange.clear();
362  if (peakbin >= ymin && peakbin <= ymax) {
363  peaks.insert(peakbin);
364  }
365  }
366  }
367 
368  if (bin == binmin) break;
369  }
370  }
371 
372  return peaks;
373 }
374 
375 int
377  int bin, float &percentile) const
378 {
379  percentile = 0.5;
380  if (type == MajorPeaks) return 10;
381  if (bin == 0) return 3;
382 
383  int fftSize = m_server->getFFTSize() >> m_yshift;
384  float binfreq = (float(sampleRate) * bin) / fftSize;
385  float hifreq = Pitch::getFrequencyForPitch(73, 0, binfreq);
386 
387  int hibin = lrintf((hifreq * fftSize) / sampleRate);
388  int medianWinSize = hibin - bin;
389  if (medianWinSize < 3) medianWinSize = 3;
390 
391  percentile = 0.5 + (binfreq / sampleRate);
392 
393  return medianWinSize;
394 }
395 
398  int ymin, int ymax)
399 {
400  Profiler profiler("FFTModel::getPeakFrequencies");
401 
402  PeakSet peaks;
403  if (!isOK()) return peaks;
404  PeakLocationSet locations = getPeaks(type, x, ymin, ymax);
405 
406  int sampleRate = getSampleRate();
407  int fftSize = m_server->getFFTSize() >> m_yshift;
408  int incr = getResolution();
409 
410  // This duplicates some of the work of estimateStableFrequency to
411  // allow us to retrieve the phases in two separate vertical
412  // columns, instead of jumping back and forth between columns x and
413  // x+1, which may be significantly slower if re-seeking is needed
414 
415  std::vector<float> phases;
416  for (PeakLocationSet::iterator i = locations.begin();
417  i != locations.end(); ++i) {
418  phases.push_back(getPhaseAt(x, *i));
419  }
420 
421  int phaseIndex = 0;
422  for (PeakLocationSet::iterator i = locations.begin();
423  i != locations.end(); ++i) {
424  float oldPhase = phases[phaseIndex];
425  float newPhase = getPhaseAt(x+1, *i);
426  float expectedPhase = oldPhase + (2.0 * M_PI * *i * incr) / fftSize;
427  float phaseError = princargf(newPhase - expectedPhase);
428  float frequency =
429  (sampleRate * (expectedPhase + phaseError - oldPhase))
430  / (2 * M_PI * incr);
431 // bool stable = (fabsf(phaseError) < (1.1f * (incr * M_PI) / fftSize));
432 // if (stable)
433  peaks[*i] = frequency;
434  ++phaseIndex;
435  }
436 
437  return peaks;
438 }
439 
440 Model *
442 {
443  return new FFTModel(*this);
444 }
445 
448  m_server(model.m_server),
449  m_xshift(model.m_xshift),
450  m_yshift(model.m_yshift)
451 {
453 }
454 
virtual int getWidth() const
Return the number of columns of bins in the model.
Definition: FFTModel.h:105
virtual int getSampleRate() const
Return the frame rate in frames per second.
Definition: FFTModel.cpp:163
virtual bool estimateStableFrequency(int x, int y, float &frequency)
Calculate an estimated frequency for a stable signal in this bin, using phase unwrapping.
Definition: FFTModel.cpp:208
FFTDataServer * getServer(const DenseTimeValueModel *, int, WindowType, int, int, int, bool, StorageAdviser::Criteria, int)
Definition: FFTModel.cpp:105
RangeSummarisableTimeValueModel * model
An implementation of DenseThreeDimensionalModel that makes FFT data derived from a DenseTimeValueMode...
Definition: FFTModel.h:33
const DenseTimeValueModel * getModel() const
Definition: FFTDataServer.h:70
ModelChannelSpec getComponent(int c) const
Any bin exceeding its immediate neighbours.
Definition: FFTModel.h:159
WindowType
Definition: Window.h:27
int getWindowIncrement() const
Definition: FFTDataServer.h:74
bool getMagnitudesAt(int x, float *values, int minbin=0, int count=0, int step=1)
virtual Model * clone() const
Return a copy of this model.
Definition: FFTModel.cpp:441
PeakPickType
Definition: FFTModel.h:156
bool getMagnitudesAt(int x, float *values, int minbin=0, int count=0)
Definition: FFTModel.h:88
virtual void setSourceModel(Model *model)
Set the source model for this model.
Definition: Model.cpp:42
float getPhaseAt(int x, int y)
Definition: FFTModel.h:78
int m_xshift
Definition: FFTModel.h:198
FFTModel(const DenseTimeValueModel *model, int channel, WindowType windowType, int windowSize, int windowIncrement, int fftSize, bool polar, StorageAdviser::Criteria criteria=StorageAdviser::NoCriteria, int fillFromColumn=0)
Construct an FFT model derived from the given DenseTimeValueModel, with the given window parameters a...
Definition: FFTModel.cpp:31
virtual int getSampleRate() const =0
Return the frame rate in frames per second.
Model is the base class for all data models that represent any sort of data on a time scale based on ...
Definition: Model.h:35
static void modelAboutToBeDeleted(Model *)
int getPeakPickWindowSize(PeakPickType type, int sampleRate, int bin, float &percentile) const
Definition: FFTModel.cpp:376
virtual Column getColumn(int x) const
Get data from the given column of bin values.
Definition: FFTModel.cpp:169
static void claimInstance(FFTDataServer *)
static float getFrequencyForPitch(int midiPitch, float centsOffset=0, float concertA=0.0)
Return the frequency at the given MIDI pitch plus centsOffset cents (1/100ths of a semitone).
Definition: Pitch.cpp:23
static void releaseInstance(FFTDataServer *)
virtual int getResolution() const
Return the number of sample frames covered by each column of bins.
Definition: FFTModel.h:130
int getFFTSize() const
Definition: FFTDataServer.h:75
Base class for models containing dense two-dimensional data (value against time).
virtual bool isOK() const
Return true if the model was constructed successfully.
Definition: FFTModel.h:120
void sourceModelAboutToBeDeleted()
Definition: FFTModel.cpp:92
~FFTModel()
Definition: FFTModel.cpp:86
virtual QString getBinName(int n) const
Get the name of a given bin (i.e.
Definition: FFTModel.cpp:199
Model * m_sourceModel
Definition: Model.h:301
Peaks picked using sliding median window.
Definition: FFTModel.h:160
virtual int getHeight() const
Return the number of bins in each column.
Definition: FFTModel.h:108
virtual PeakSet getPeakFrequencies(PeakPickType type, int x, int ymin=0, int ymax=0)
Return locations and estimated stable frequencies of peak bins.
Definition: FFTModel.cpp:397
std::map< int, float > PeakSet
Definition: FFTModel.h:164
float princargf(float a)
Definition: System.cpp:326
int m_yshift
Definition: FFTModel.h:199
FFTDataServer * m_server
Definition: FFTModel.h:197
virtual PeakLocationSet getPeaks(PeakPickType type, int x, int ymin=0, int ymax=0)
Return locations of peak bins in the range [ymin,ymax].
Definition: FFTModel.cpp:250
std::set< int > PeakLocationSet
Definition: FFTModel.h:163
static FFTDataServer * getFuzzyInstance(const DenseTimeValueModel *model, int channel, WindowType windowType, int windowSize, int windowIncrement, int fftSize, bool polar, StorageAdviser::Criteria criteria=StorageAdviser::NoCriteria, int fillFromColumn=0)
Profile point instance class.
Definition: Profiler.h:86