svcore  1.9
CSVFileReader.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "CSVFileReader.h"
17 
18 #include "model/Model.h"
19 #include "base/RealTime.h"
20 #include "base/StringBits.h"
24 #include "model/RegionModel.h"
25 #include "model/NoteModel.h"
26 #include "DataFileReaderFactory.h"
27 
28 #include <QFile>
29 #include <QString>
30 #include <QRegExp>
31 #include <QStringList>
32 #include <QTextStream>
33 
34 #include <iostream>
35 #include <map>
36 
38  int mainModelSampleRate) :
39  m_format(format),
40  m_file(0),
41  m_warnings(0),
42  m_mainModelSampleRate(mainModelSampleRate)
43 {
44  m_file = new QFile(path);
45  bool good = false;
46 
47  if (!m_file->exists()) {
48  m_error = QFile::tr("File \"%1\" does not exist").arg(path);
49  } else if (!m_file->open(QIODevice::ReadOnly | QIODevice::Text)) {
50  m_error = QFile::tr("Failed to open file \"%1\"").arg(path);
51  } else {
52  good = true;
53  }
54 
55  if (!good) {
56  delete m_file;
57  m_file = 0;
58  }
59 }
60 
62 {
63  SVDEBUG << "CSVFileReader::~CSVFileReader: file is " << m_file << endl;
64 
65  if (m_file) {
66  SVDEBUG << "CSVFileReader::CSVFileReader: Closing file" << endl;
67  m_file->close();
68  }
69  delete m_file;
70 }
71 
72 bool
74 {
75  return (m_file != 0);
76 }
77 
78 QString
80 {
81  return m_error;
82 }
83 
84 int
85 CSVFileReader::convertTimeValue(QString s, int lineno, int sampleRate,
86  int windowSize) const
87 {
88  QRegExp nonNumericRx("[^0-9eE.,+-]");
89  int warnLimit = 10;
90 
92 
93  int calculatedFrame = 0;
94 
95  bool ok = false;
96  QString numeric = s;
97  numeric.remove(nonNumericRx);
98 
99  if (timeUnits == CSVFormat::TimeSeconds) {
100 
101  double time = numeric.toDouble(&ok);
102  if (!ok) time = StringBits::stringToDoubleLocaleFree(numeric, &ok);
103  calculatedFrame = int(time * sampleRate + 0.5);
104 
105  } else {
106 
107  long n = numeric.toLong(&ok);
108  if (n >= 0) calculatedFrame = n;
109 
110  if (timeUnits == CSVFormat::TimeWindows) {
111  calculatedFrame *= windowSize;
112  }
113  }
114 
115  if (!ok) {
116  if (m_warnings < warnLimit) {
117  cerr << "WARNING: CSVFileReader::load: "
118  << "Bad time format (\"" << s
119  << "\") in data line "
120  << lineno+1 << endl;
121  } else if (m_warnings == warnLimit) {
122  cerr << "WARNING: Too many warnings" << endl;
123  }
124  ++m_warnings;
125  }
126 
127  return calculatedFrame;
128 }
129 
130 Model *
132 {
133  if (!m_file) return 0;
134 
138  int sampleRate = m_format.getSampleRate();
139  int windowSize = m_format.getWindowSize();
140  QChar separator = m_format.getSeparator();
141  bool allowQuoting = m_format.getAllowQuoting();
142 
143  if (timingType == CSVFormat::ExplicitTiming) {
144  if (modelType == CSVFormat::ThreeDimensionalModel) {
145  // This will be overridden later if more than one line
146  // appears in our file, but we want to choose a default
147  // that's likely to be visible
148  windowSize = 1024;
149  } else {
150  windowSize = 1;
151  }
152  if (timeUnits == CSVFormat::TimeSeconds) {
153  sampleRate = m_mainModelSampleRate;
154  }
155  }
156 
157  SparseOneDimensionalModel *model1 = 0;
158  SparseTimeValueModel *model2 = 0;
159  RegionModel *model2a = 0;
160  NoteModel *model2b = 0;
162  Model *model = 0;
163 
164  QTextStream in(m_file);
165  in.seek(0);
166 
167  unsigned int warnings = 0, warnLimit = 10;
168  unsigned int lineno = 0;
169 
170  float min = 0.0, max = 0.0;
171 
172  int frameNo = 0;
173  int duration = 0;
174  int endFrame = 0;
175 
176  bool haveAnyValue = false;
177  bool haveEndTime = false;
178  bool pitchLooksLikeMIDI = true;
179 
180  int startFrame = 0; // for calculation of dense model resolution
181  bool firstEverValue = true;
182 
183  std::map<QString, int> labelCountMap;
184 
185  int valueColumns = 0;
186  for (int i = 0; i < m_format.getColumnCount(); ++i) {
188  ++valueColumns;
189  }
190  }
191 
192  while (!in.atEnd()) {
193 
194  // QTextStream's readLine doesn't cope with old-style Mac
195  // CR-only line endings. Why did they bother making the class
196  // cope with more than one sort of line ending, if it still
197  // can't be configured to cope with all the common sorts?
198 
199  // For the time being we'll deal with this case (which is
200  // relatively uncommon for us, but still necessary to handle)
201  // by reading the entire file using a single readLine, and
202  // splitting it. For CR and CR/LF line endings this will just
203  // read a line at a time, and that's obviously OK.
204 
205  QString chunk = in.readLine();
206  QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
207 
208  for (int li = 0; li < lines.size(); ++li) {
209 
210  QString line = lines[li];
211 
212  if (line.startsWith("#")) continue;
213 
214  QStringList list = StringBits::split(line, separator, allowQuoting);
215  if (!model) {
216 
217  switch (modelType) {
218 
220  model1 = new SparseOneDimensionalModel(sampleRate, windowSize);
221  model = model1;
222  break;
223 
225  model2 = new SparseTimeValueModel(sampleRate, windowSize, false);
226  model = model2;
227  break;
228 
230  model2a = new RegionModel(sampleRate, windowSize, false);
231  model = model2a;
232  break;
233 
235  model2b = new NoteModel(sampleRate, windowSize, false);
236  model = model2b;
237  break;
238 
241  (sampleRate,
242  windowSize,
243  valueColumns,
245  model = model3;
246  break;
247  }
248  }
249 
250  float value = 0.f;
251  float pitch = 0.f;
252  QString label = "";
253 
254  duration = 0.f;
255  haveEndTime = false;
256 
257  for (int i = 0; i < list.size(); ++i) {
258 
259  QString s = list[i];
260 
262 
263  switch (purpose) {
264 
266  break;
267 
269  frameNo = convertTimeValue(s, lineno, sampleRate, windowSize);
270  break;
271 
273  endFrame = convertTimeValue(s, lineno, sampleRate, windowSize);
274  haveEndTime = true;
275  break;
276 
278  duration = convertTimeValue(s, lineno, sampleRate, windowSize);
279  break;
280 
282  value = s.toFloat();
283  haveAnyValue = true;
284  break;
285 
287  pitch = s.toFloat();
288  if (pitch < 0.f || pitch > 127.f) {
289  pitchLooksLikeMIDI = false;
290  }
291  break;
292 
294  label = s;
295  ++labelCountMap[label];
296  break;
297  }
298  }
299 
300  if (haveEndTime) { // ... calculate duration now all cols read
301  if (endFrame > frameNo) {
302  duration = endFrame - frameNo;
303  }
304  }
305 
306  if (modelType == CSVFormat::OneDimensionalModel) {
307 
308  SparseOneDimensionalModel::Point point(frameNo, label);
309  model1->addPoint(point);
310 
311  } else if (modelType == CSVFormat::TwoDimensionalModel) {
312 
313  SparseTimeValueModel::Point point(frameNo, value, label);
314  model2->addPoint(point);
315 
316  } else if (modelType == CSVFormat::TwoDimensionalModelWithDuration) {
317 
318  RegionModel::Point point(frameNo, value, duration, label);
319  model2a->addPoint(point);
320 
321  } else if (modelType == CSVFormat::TwoDimensionalModelWithDurationAndPitch) {
322 
323  float level = ((value >= 0.f && value <= 1.f) ? value : 1.f);
324  NoteModel::Point point(frameNo, pitch, duration, level, label);
325  model2b->addPoint(point);
326 
327  } else if (modelType == CSVFormat::ThreeDimensionalModel) {
328 
330 
331  for (int i = 0; i < list.size(); ++i) {
332 
334  continue;
335  }
336 
337  bool ok = false;
338  float value = list[i].toFloat(&ok);
339 
340  values.push_back(value);
341 
342  if (firstEverValue || value < min) min = value;
343  if (firstEverValue || value > max) max = value;
344 
345  if (firstEverValue) {
346  startFrame = frameNo;
347  model3->setStartFrame(startFrame);
348  } else if (lineno == 1 &&
349  timingType == CSVFormat::ExplicitTiming) {
350  model3->setResolution(frameNo - startFrame);
351  }
352 
353  firstEverValue = false;
354 
355  if (!ok) {
356  if (warnings < warnLimit) {
357  cerr << "WARNING: CSVFileReader::load: "
358  << "Non-numeric value \""
359  << list[i]
360  << "\" in data line " << lineno+1
361  << ":" << endl;
362  cerr << line << endl;
363  ++warnings;
364  } else if (warnings == warnLimit) {
365 // cerr << "WARNING: Too many warnings" << endl;
366  }
367  }
368  }
369 
370 // SVDEBUG << "Setting bin values for count " << lineno << ", frame "
371 // << frameNo << ", time " << RealTime::frame2RealTime(frameNo, sampleRate) << endl;
372 
373  model3->setColumn(lineno, values);
374  }
375 
376  ++lineno;
377  if (timingType == CSVFormat::ImplicitTiming ||
378  list.size() == 0) {
379  frameNo += windowSize;
380  }
381  }
382  }
383 
384  if (!haveAnyValue) {
385  if (model2a) {
386  // assign values for regions based on label frequency; we
387  // have this in our labelCountMap, sort of
388 
389  std::map<int, std::map<QString, float> > countLabelValueMap;
390  for (std::map<QString, int>::iterator i = labelCountMap.begin();
391  i != labelCountMap.end(); ++i) {
392  countLabelValueMap[i->second][i->first] = 0.f;
393  }
394 
395  float v = 0.f;
396  for (std::map<int, std::map<QString, float> >::iterator i =
397  countLabelValueMap.end(); i != countLabelValueMap.begin(); ) {
398  --i;
399  for (std::map<QString, float>::iterator j = i->second.begin();
400  j != i->second.end(); ++j) {
401  j->second = v;
402  v = v + 1.f;
403  }
404  }
405 
408  for (RegionModel::PointList::const_iterator i =
409  model2a->getPoints().begin();
410  i != model2a->getPoints().end(); ++i) {
411  RegionModel::Point p(*i);
412  v = countLabelValueMap[labelCountMap[p.label]][p.label];
413  RegionModel::Point pp(p.frame, v, p.duration, p.label);
414  pointMap[p] = pp;
415  }
416 
417  for (std::map<RegionModel::Point, RegionModel::Point>::iterator i =
418  pointMap.begin(); i != pointMap.end(); ++i) {
419  model2a->deletePoint(i->first);
420  model2a->addPoint(i->second);
421  }
422  }
423  }
424 
425  if (model2b) {
426  if (pitchLooksLikeMIDI) {
427  model2b->setScaleUnits("MIDI Pitch");
428  } else {
429  model2b->setScaleUnits("Hz");
430  }
431  }
432 
433  if (model3) {
434  model3->setMinimumLevel(min);
435  model3->setMaximumLevel(max);
436  }
437 
438  return model;
439 }
440 
virtual void setMinimumLevel(float sz)
Set the minimum value of the value in a bin.
bool getAllowQuoting() const
Definition: CSVFormat.h:92
virtual void deletePoint(const PointType &point)
Remove a point.
static QStringList split(QString s, QChar separator, bool quoted)
Split a string at the given separator character.
Definition: StringBits.cpp:201
int m_mainModelSampleRate
Definition: CSVFileReader.h:43
NoteModel – a concrete IntervalModel for notes.
Definition: NoteModel.h:38
QString m_error
Definition: CSVFileReader.h:41
RegionModel – a concrete IntervalModel for intervals associated with a value, which we call regions f...
Definition: RegionModel.h:36
CSVFormat m_format
Definition: CSVFileReader.h:39
virtual bool isOK() const
Return true if the file appears to be of the correct type.
int convertTimeValue(QString, int lineno, int sampleRate, int windowSize) const
ColumnPurpose getColumnPurpose(int i)
Definition: CSVFormat.cpp:303
virtual void addPoint(const PointType &point)
Add a point.
Definition: SparseModel.h:704
virtual SparseValueModel< PointType >::PointList getPoints(long start, long end) const
PointTypes have a duration, so this returns all points that span any of the given range (as well as t...
ModelType getModelType() const
Definition: CSVFormat.h:86
CSVFileReader(QString path, CSVFormat format, int mainModelSampleRate)
int duration
Definition: RegionModel.h:48
QFile * m_file
Definition: CSVFileReader.h:40
Time/value point type for use in a SparseModel or SparseValueModel.
int getSampleRate() const
Definition: CSVFormat.h:89
virtual QString getError() const
TimingType getTimingType() const
Definition: CSVFormat.h:87
virtual void setScaleUnits(QString units)
virtual void addPoint(const Point &point)
Add a point.
Definition: RegionModel.h:206
QChar getSeparator() const
Definition: CSVFormat.h:93
virtual void setStartFrame(int)
Set the frame offset of the first column.
virtual void addPoint(const PointType &point)
Add a point.
Model is the base class for all data models that represent any sort of data on a time scale based on ...
Definition: Model.h:35
virtual void setMaximumLevel(float sz)
Set the maximum value of the value in a bin.
#define SVDEBUG
Definition: Debug.h:42
QString label
Definition: RegionModel.h:49
virtual Model * load() const
Read the file and return the corresponding data model.
long frame
Definition: RegionModel.h:46
TimeUnits getTimeUnits() const
Definition: CSVFormat.h:88
virtual void setResolution(int sz)
Set the number of sample frames covered by each set of bins.
virtual ~CSVFileReader()
int getWindowSize() const
Definition: CSVFormat.h:90
int getColumnCount() const
Definition: CSVFormat.h:91
virtual void setColumn(int x, const Column &values)
Set the entire set of bin values at the given column.
static double stringToDoubleLocaleFree(QString s, bool *ok=0)
Convert a string to a double using basic "C"-locale syntax, i.e.
Definition: StringBits.cpp:24