From c13b24251e4ed0b9e8f17fd2d3322ed5137abbfc Mon Sep 17 00:00:00 2001 From: Eric Christoffersen <46055145+ericchristoffersen@users.noreply.github.com> Date: Fri, 29 Nov 2019 21:20:42 -0800 Subject: [PATCH 1/2] Speedup RideCache Load Time Implement regex/hash based string substitution object to perform multiple substitutions in 2 passes. Speeds up athlete data load by 2x. Use QStringRef to avoid copy Fixes #3234 --- src/Core/RideDB.l | 14 +---- src/Core/Utils.cpp | 115 +++++++++++++++++++++++++++++++++++++- src/FileIO/JsonRideFile.h | 3 + src/FileIO/JsonRideFile.l | 14 +---- 4 files changed, 123 insertions(+), 23 deletions(-) diff --git a/src/Core/RideDB.l b/src/Core/RideDB.l index 7c281b585..4b10c3cc2 100644 --- a/src/Core/RideDB.l +++ b/src/Core/RideDB.l @@ -44,20 +44,12 @@ static QString unprotect(char *string) // this is a lexer string so it will be enclosed // in quotes. Lets strip those first - QString s = string2.mid(1,string2.length()-2); + QStringRef r = string2.midRef(1,string2.length()-2); // does it end with a space (to avoid token conflict) ? - if (s.endsWith(" ")) s = s.mid(0, s.length()-1); + if (r.endsWith(" ")) r = r.mid(0, r.length()-1); - // now un-escape the control characters - s.replace("\\t", "\t"); // tab - s.replace("\\n", "\n"); // newline - s.replace("\\r", "\r"); // carriage-return - s.replace("\\b", "\b"); // backspace - s.replace("\\f", "\f"); // formfeed - s.replace("\\/", "/"); // solidus - s.replace("\\\"", "\""); // quote - s.replace("\\\\", "\\"); // backslash + QString s = Utils::RidefileUnEscape(r); return s; } diff --git a/src/Core/Utils.cpp b/src/Core/Utils.cpp index 0b5003304..05e9cad1e 100644 --- a/src/Core/Utils.cpp +++ b/src/Core/Utils.cpp @@ -22,9 +22,122 @@ #include #include #include +#include namespace Utils { + +// Class for performing multiple string substitutions in a single +// pass over string. This implementation is only valid when substitutions +// will not inject additional substitution opportunities. +class StringSubstitutionizer +{ + QVector v; + QMap qm; + QRegularExpression qr; + + QString GetSubstitute(QString s) const + { + if (!qm.contains(s)) return QString(); + return qm.value(s); + } + + void BuildFindAnyRegex() + { + QString qRegexString; + + for (int i = 0; i < v.size(); i++) + { + if (i > 0) qRegexString.append("|"); + + qRegexString.append(v[i]); + } + + qr = QRegularExpression(qRegexString); + } + +protected: + + void PushSubstitution(QString regexstring, QString matchstring, QString substitute) + { + if (!qm.contains(matchstring)) + { + v.push_back(regexstring); + qm[matchstring] = substitute; + } + } + + void FinalizeInit() + { + BuildFindAnyRegex(); + } + + QRegularExpression GetFindAnyRegex() const + { + return qr; + } + +public: + + QString BuildSubstitutedString(QStringRef s) const + { + QRegularExpression qr = GetFindAnyRegex(); + + QRegularExpressionMatchIterator i = qr.globalMatch(s); + + if (!i.hasNext()) + return s.toString(); + + QString newstring; + + unsigned iCopyIdx = 0; + do + { + QRegularExpressionMatch match = i.next(); + int copysize = match.capturedStart() - iCopyIdx; + + if (copysize > 0) + newstring.append(s.mid(iCopyIdx, copysize)); + + newstring.append(GetSubstitute(match.captured())); + + iCopyIdx = (match.capturedStart() + match.captured().size()); + } while (i.hasNext()); + + int copysize = s.size() - iCopyIdx; + if (copysize > 0) + newstring.append(s.mid(iCopyIdx, copysize)); + + return newstring; + } +}; + +struct RidefileUnEscaper : public StringSubstitutionizer +{ + RidefileUnEscaper() + { + // regex match replacement + PushSubstitution("\\\\t", "\\t", "\t"); // tab + PushSubstitution("\\\\n", "\\n", "\n"); // newline + PushSubstitution("\\\\r", "\\r", "\r"); // carriage-return + PushSubstitution("\\\\b", "\\b", "\b"); // backspace + PushSubstitution("\\\\f", "\\f", "\f"); // formfeed + PushSubstitution("\\\\/", "\\/", "/"); // solidus + PushSubstitution("\\\\\"", "\\\"", "\""); // quote + PushSubstitution("\\\\\\\\", "\\\\", "\\"); // backslash + + FinalizeInit(); + } +}; + +QString RidefileUnEscape(const QStringRef s) +{ + // Static const object constructs it's search regex at load time. + static const RidefileUnEscaper s_RidefileUnescaper; + + return s_RidefileUnescaper.BuildSubstitutedString(s); +} + // when writing xml... QString xmlprotect(const QString &string) { @@ -43,7 +156,7 @@ QString xmlprotect(const QString &string) return s; } -// BEWARE: this function is tide closely to RideFile parsing +// BEWARE: this function is tied closely to RideFile parsing // DO NOT CHANGE IT UNLESS YOU KNOW WHAT YOU ARE DOING QString unprotect(const QString &buffer) { diff --git a/src/FileIO/JsonRideFile.h b/src/FileIO/JsonRideFile.h index 98e8acc6c..4eff68653 100644 --- a/src/FileIO/JsonRideFile.h +++ b/src/FileIO/JsonRideFile.h @@ -29,6 +29,9 @@ #include #define DATETIME_FORMAT "yyyy/MM/dd hh:mm:ss' UTC'" +namespace Utils { + QString RidefileUnEscape(QStringRef); +}; struct JsonFileReader : public RideFileReader { virtual RideFile *openRideFile(QFile &file, QStringList &errors, QList* = 0) const; diff --git a/src/FileIO/JsonRideFile.l b/src/FileIO/JsonRideFile.l index 6aeebc07b..4a335c5f3 100644 --- a/src/FileIO/JsonRideFile.l +++ b/src/FileIO/JsonRideFile.l @@ -44,20 +44,12 @@ static QString unprotect(char *string) // this is a lexer string so it will be enclosed // in quotes. Lets strip those first - QString s = string2.mid(1,string2.length()-2); + QStringRef r = string2.midRef(1,string2.length()-2); // does it end with a space (to avoid token conflict) ? - if (s.endsWith(" ")) s = s.mid(0, s.length()-1); + if (r.endsWith(" ")) r = r.mid(0, r.length()-1); - // now un-escape the control characters - s.replace("\\t", "\t"); // tab - s.replace("\\n", "\n"); // newline - s.replace("\\r", "\r"); // carriage-return - s.replace("\\b", "\b"); // backspace - s.replace("\\f", "\f"); // formfeed - s.replace("\\/", "/"); // solidus - s.replace("\\\"", "\""); // quote - s.replace("\\\\", "\\"); // backslash + QString s = Utils::RidefileUnEscape(r); return s; } From fa9a59d60ced1cd0baf0f1650ed3b026f3e5d13d Mon Sep 17 00:00:00 2001 From: Eric Christoffersen <46055145+ericchristoffersen@users.noreply.github.com> Date: Sun, 1 Dec 2019 13:30:58 -0800 Subject: [PATCH 2/2] Speedup critical string handling in RideCache Save --- src/Core/RideDB.y | 60 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/Core/RideDB.y b/src/Core/RideDB.y index 9addb8f5d..9eb4cccfb 100644 --- a/src/Core/RideDB.y +++ b/src/Core/RideDB.y @@ -426,6 +426,48 @@ static bool setup_mmp_durations() } static bool did_mmp_durations = setup_mmp_durations(); +// Fast constructon of common string. In the future we'll achieve this with a single variadic template function. +QString ConstructNameNumberString(QString s0, QString name, QString s1, double num, QString s2) +{ + QString numStr; + numStr.setNum(num, 'f', 5); + + QString m; + m.reserve(s0.length() + name.length() + s1.length() + numStr.length() + s2.length()); + + m.append(s0); + m.append(name); + m.append(s1); + m.append(numStr); + m.append(s2); + + return m; +} + +// Fast constructon of common string. In the future we'll achieve this with a single variadic template function. +QString ConstructNameNumberNumberString(QString s0, QString name, QString s1, double num0, QString s2, double num1, QString s3) +{ + QString num0Str; + num0Str.setNum(num0, 'f', 5); + + QString num1Str; + num1Str.setNum(num1, 'f', 5); + + QString m; + m.reserve(s0.length() + name.length() + s1.length() + num0Str.length() + s2.length() + num1Str.length() + s3.length()); + + m.append(s0); + m.append(name); + m.append(s1); + m.append(num0Str); + m.append(s2); + m.append(num1Str); + m.append(s3); + + return m; +} + + // save cache to disk // // if opendata is true then save in format for sending to the GC OpenData project @@ -548,12 +590,11 @@ void RideCache::save(bool opendata, QString filename) << QString("%1").arg(item->stdvariances().value(index, 0.0f), 0, 'f', 5) <<"\"]"; } else if (item->counts()[index] == 0) { // if count is 0 don't write it - stream << "\t\t\t\"" << name << "\":\"" << QString("%1").arg(item->metrics()[index], 0, 'f', 5) <<"\""; + stream << ConstructNameNumberString(QString("\t\t\t\""), name, + QString("\":\""), item->metrics()[index], QString("\"")); } else { - - // count is not 1, so lets write it - stream << "\t\t\t\"" << name << "\":[\"" << QString("%1").arg(item->metrics()[index], 0, 'f', 5) <<"\",\"" - << QString("%1").arg(item->counts()[index], 0, 'f', 5) <<"\"]"; + stream << ConstructNameNumberNumberString(QString("\t\t\t\""), name, + QString("\":[\""), item->metrics()[index], QString("\",\""), item->counts()[index], QString("\"]")); } } } @@ -782,12 +823,11 @@ void RideCache::save(bool opendata, QString filename) // if count is 0 don't write it } else if (interval->counts()[index] == 0) { - stream << "\t\t\t\t\"" << name << "\":\"" << QString("%1").arg(interval->metrics()[index], 0, 'f', 5) <<"\""; + stream << ConstructNameNumberString(QString("\t\t\t\""), name, + QString("\":\""), item->metrics()[index], QString("\"")); } else { - - // count is not 1, so lets write it - stream << "\t\t\t\t\"" << name << "\":[\"" << QString("%1").arg(interval->metrics()[index], 0, 'f', 5) <<"\",\"" - << QString("%1").arg(interval->counts()[index], 0, 'f', 5) <<"\"]"; + stream << ConstructNameNumberNumberString(QString("\t\t\t\""), name, + QString("\":[\""), item->metrics()[index], QString("\",\""), item->counts()[index], QString("\"]")); } } }