svcore  1.9
CSVFormat.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Sonic Visualiser
5  An audio file viewer and annotation editor.
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2006 Chris Cannam.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include "CSVFormat.h"
17 
18 #include "base/StringBits.h"
19 
20 #include <QFile>
21 #include <QString>
22 #include <QRegExp>
23 #include <QStringList>
24 #include <QTextStream>
25 
26 #include <iostream>
27 
28 CSVFormat::CSVFormat(QString path) :
29  m_separator(""),
30  m_sampleRate(44100),
31  m_windowSize(1024),
32  m_allowQuoting(true)
33 {
34  guessFormatFor(path);
35 }
36 
37 void
39 {
43 
44  m_maxExampleCols = 0;
45  m_columnCount = 0;
46  m_variableColumnCount = false;
47 
48  m_example.clear();
49  m_columnQualities.clear();
50  m_columnPurposes.clear();
51  m_prevValues.clear();
52 
53  QFile file(path);
54  if (!file.exists()) return;
55  if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
56 
57  QTextStream in(&file);
58  in.seek(0);
59 
60  int lineno = 0;
61 
62  while (!in.atEnd()) {
63 
64  // See comment about line endings in CSVFileReader::load()
65 
66  QString chunk = in.readLine();
67  QStringList lines = chunk.split('\r', QString::SkipEmptyParts);
68 
69  for (int li = 0; li < lines.size(); ++li) {
70 
71  QString line = lines[li];
72  if (line.startsWith("#") || line == "") continue;
73 
74  guessQualities(line, lineno);
75 
76  ++lineno;
77  }
78 
79  if (lineno >= 50) break;
80  }
81 
82  guessPurposes();
83 }
84 
85 void
87 {
88  char candidates[] = { ',', '\t', ' ', '|', '/', ':' };
89  for (int i = 0; i < int(sizeof(candidates)/sizeof(candidates[0])); ++i) {
90  if (StringBits::split(line, candidates[i], m_allowQuoting).size() >= 2) {
91  m_separator = candidates[i];
92  return;
93  }
94  }
95  m_separator = " ";
96 }
97 
98 void
99 CSVFormat::guessQualities(QString line, int lineno)
100 {
101  if (m_separator == "") guessSeparator(line);
102 
103  QStringList list = StringBits::split(line, m_separator[0], m_allowQuoting);
104 
105  int cols = list.size();
106  if (lineno == 0 || (cols < m_columnCount)) m_columnCount = cols;
107  if (cols != m_columnCount) m_variableColumnCount = true;
108 
109  // All columns are regarded as having these qualities until we see
110  // something that indicates otherwise:
111 
112  ColumnQualities defaultQualities =
114 
115  for (int i = 0; i < cols; ++i) {
116 
117  while (m_columnQualities.size() <= i) {
118  m_columnQualities.push_back(defaultQualities);
119  m_prevValues.push_back(0.f);
120  }
121 
122  QString s(list[i]);
123  bool ok = false;
124 
125  ColumnQualities qualities = m_columnQualities[i];
126 
127  bool numeric = (qualities & ColumnNumeric);
128  bool integral = (qualities & ColumnIntegral);
129  bool increasing = (qualities & ColumnIncreasing);
130  bool large = (qualities & ColumnLarge); // this one defaults to off
131 
132  float value = 0.f;
133 
135 
136  if (numeric) {
137  value = s.toFloat(&ok);
138  if (!ok) {
139  value = (float)StringBits::stringToDoubleLocaleFree(s, &ok);
140  }
141  if (ok) {
142  if (lineno < 2 && value > 1000.f) large = true;
143  } else {
144  numeric = false;
145  }
146  }
147 
148  if (numeric) {
149 
150  if (integral) {
151  if (s.contains('.') || s.contains(',')) {
152  integral = false;
153  }
154  }
155 
156  if (increasing) {
157  if (lineno > 0 && value <= m_prevValues[i]) {
158  increasing = false;
159  }
160  }
161 
162  m_prevValues[i] = value;
163  }
164 
165  m_columnQualities[i] =
166  (numeric ? ColumnNumeric : 0) |
167  (integral ? ColumnIntegral : 0) |
168  (increasing ? ColumnIncreasing : 0) |
169  (large ? ColumnLarge : 0);
170  }
171 
172  if (lineno < 10) {
173  m_example.push_back(list);
174  if (lineno == 0 || cols > m_maxExampleCols) {
175  m_maxExampleCols = cols;
176  }
177  }
178 
179 // cerr << "Estimated column qualities: ";
180 // for (int i = 0; i < m_columnCount; ++i) {
181 // cerr << int(m_columnQualities[i]) << " ";
182 // }
183 // cerr << endl;
184 }
185 
186 void
188 {
191 
192  int timingColumnCount = 0;
193 
194  for (int i = 0; i < m_columnCount; ++i) {
195 
196  ColumnPurpose purpose = ColumnUnknown;
197  bool primary = (i == 0);
198 
199  ColumnQualities qualities = m_columnQualities[i];
200 
201  bool numeric = (qualities & ColumnNumeric);
202  bool integral = (qualities & ColumnIntegral);
203  bool increasing = (qualities & ColumnIncreasing);
204  bool large = (qualities & ColumnLarge);
205 
206  bool timingColumn = (numeric && increasing);
207 
208  if (timingColumn) {
209 
210  ++timingColumnCount;
211 
212  if (primary) {
213 
214  purpose = ColumnStartTime;
215 
217 
218  if (integral && large) {
220  } else {
222  }
223 
224  } else {
225 
226  if (timingColumnCount == 2 && m_timingType == ExplicitTiming) {
227  purpose = ColumnEndTime;
228  }
229  }
230  }
231 
232  if (purpose == ColumnUnknown) {
233  if (numeric) {
234  purpose = ColumnValue;
235  } else {
236  purpose = ColumnLabel;
237  }
238  }
239 
240  setColumnPurpose(i, purpose);
241  }
242 
243  int valueCount = 0;
244  for (int i = 0; i < m_columnCount; ++i) {
245  if (m_columnPurposes[i] == ColumnValue) ++valueCount;
246  }
247 
248  if (valueCount == 2 && timingColumnCount == 1) {
249  // If we have exactly two apparent value columns and only one
250  // timing column, but one value column is integral and the
251  // other is not, guess that whichever one matches the integral
252  // status of the time column is either duration or end time
253  if (m_timingType == ExplicitTiming) {
254  int a = -1, b = -1;
255  for (int i = 0; i < m_columnCount; ++i) {
256  if (m_columnPurposes[i] == ColumnValue) {
257  if (a == -1) a = i;
258  else b = i;
259  }
260  }
261  if ((m_columnQualities[a] & ColumnIntegral) !=
263  int timecol = a;
264  if ((m_columnQualities[a] & ColumnIntegral) !=
266  timecol = b;
267  }
268  if (m_columnQualities[timecol] & ColumnIncreasing) {
269  // This shouldn't happen; should have been settled above
270  m_columnPurposes[timecol] = ColumnEndTime;
271  } else {
272  m_columnPurposes[timecol] = ColumnDuration;
273  }
274  --valueCount;
275  }
276  }
277  }
278 
279  if (timingColumnCount > 1) {
281  } else {
282  if (valueCount == 0) {
284  } else if (valueCount == 1) {
286  } else {
288  }
289  }
290 
291 // cerr << "Estimated column purposes: ";
292 // for (int i = 0; i < m_columnCount; ++i) {
293 // cerr << int(m_columnPurposes[i]) << " ";
294 // }
295 // cerr << endl;
296 
297 // cerr << "Estimated model type: " << m_modelType << endl;
298 // cerr << "Estimated timing type: " << m_timingType << endl;
299 // cerr << "Estimated units: " << m_timeUnits << endl;
300 }
301 
304 {
305  while (m_columnPurposes.size() <= i) {
306  m_columnPurposes.push_back(ColumnUnknown);
307  }
308  return m_columnPurposes[i];
309 }
310 
313 {
314  if (m_columnPurposes.size() <= i) {
315  return ColumnUnknown;
316  }
317  return m_columnPurposes[i];
318 }
319 
320 void
322 {
323  while (m_columnPurposes.size() <= i) {
324  m_columnPurposes.push_back(ColumnUnknown);
325  }
326  m_columnPurposes[i] = p;
327 }
328 
329 
330 
331 
CSVFormat()
Definition: CSVFormat.h:62
int m_maxExampleCols
Definition: CSVFormat.h:140
void guessSeparator(QString line)
Definition: CSVFormat.cpp:86
int m_columnCount
Definition: CSVFormat.h:129
void guessFormatFor(QString path)
Guess the format of the given CSV file, setting the fields in this object accordingly.
Definition: CSVFormat.cpp:38
static QStringList split(QString s, QChar separator, bool quoted)
Split a string at the given separator character.
Definition: StringBits.cpp:201
QList< float > m_prevValues
Definition: CSVFormat.h:135
ColumnPurpose getColumnPurpose(int i)
Definition: CSVFormat.cpp:303
QList< ColumnPurpose > m_columnPurposes
Definition: CSVFormat.h:133
void guessQualities(QString line, int lineno)
Definition: CSVFormat.cpp:99
void guessPurposes()
Definition: CSVFormat.cpp:187
ModelType m_modelType
Definition: CSVFormat.h:122
TimingType m_timingType
Definition: CSVFormat.h:123
QList< QStringList > m_example
Definition: CSVFormat.h:139
void setColumnPurpose(int i, ColumnPurpose p)
Definition: CSVFormat.cpp:321
bool m_allowQuoting
Definition: CSVFormat.h:137
unsigned int ColumnQualities
Definition: CSVFormat.h:60
QString m_separator
Definition: CSVFormat.h:125
bool m_variableColumnCount
Definition: CSVFormat.h:130
TimeUnits m_timeUnits
Definition: CSVFormat.h:124
static double stringToDoubleLocaleFree(QString s, bool *ok=0)
Convert a string to a double using basic "C"-locale syntax, i.e.
Definition: StringBits.cpp:24
QList< ColumnQualities > m_columnQualities
Definition: CSVFormat.h:132