Simplify linkifyMessage

Parsing html as xml has inherent problems, most notable there are many
matrix clients that don't escape ampersands in urls of mx-replies, etc.
(See issue #18)

This also removes the replacement of <mx-reply> as it isn't strictly
needed.

Also the QRegExp is replaced with the Qt5 QRegularExpression for
perfomance and because it supports lookahead and lookbehind.

I'm pretty sure that the original code also replaced href="" with
href=\"\", which was probably wrong, but I'm not to sure about that.

Fixes #18
This commit is contained in:
Nicolas Werner 2019-05-01 12:11:19 +02:00
parent e1457d5c7a
commit 23eef9e1bc
2 changed files with 8 additions and 61 deletions

View file

@ -1,6 +1,6 @@
#pragma once
#include <QRegExp>
#include <QRegularExpression>
#include <QString>
// Non-theme app configuration. Layouts, fonts spacing etc.
@ -51,8 +51,11 @@ constexpr auto LABEL_BIG_SIZE_RATIO = 2;
namespace strings {
const QString url_html = "<a href=\"\\1\">\\1</a>";
const QRegExp url_regex(
"((www\\.(?!\\.)|[a-z][a-z0-9+.-]*://)[^\\s<>'\"]+[^!,\\.\\s<>'\"\\]\\)\\:])");
const QRegularExpression url_regex(
// match an URL, that is not quoted, i.e.
// vvvvvv match quote via negative lookahead/lookbehind vvvvv
// vvvv atomic match url -> fail if there is a " before or after vv
R"regex((?<!")(?>((www\.(?!\.)|[a-z][a-z0-9+.-]*://)[^\s<>'"]+[^!,\.\s<>'"\]\)\:]))(?!"))regex");
}
// Window geometry.

View file

@ -291,65 +291,9 @@ utils::linkifyMessage(const QString &body)
{
// Convert to valid XML.
auto doc = QString("<html>%1</html>").arg(body);
doc.replace(conf::strings::url_regex, conf::strings::url_html);
doc.replace("<mx-reply>", "");
doc.replace("</mx-reply>", "");
doc.replace("<br>", "<br></br>");
QXmlStreamReader xml{doc};
QString textString;
while (!xml.atEnd() && !xml.hasError()) {
auto t = xml.readNext();
switch (t) {
case QXmlStreamReader::Characters: {
auto text = xml.text().toString();
text.replace(conf::strings::url_regex, conf::strings::url_html);
textString += text;
break;
}
case QXmlStreamReader::StartDocument:
case QXmlStreamReader::EndDocument:
break;
case QXmlStreamReader::StartElement: {
if (xml.name() == "html")
break;
textString += QString("<%1").arg(xml.name().toString());
const auto attrs = xml.attributes();
for (const auto &e : attrs)
textString += QString(" %1=\"%2\"")
.arg(e.name().toString())
.arg(e.value().toString());
textString += ">";
break;
}
case QXmlStreamReader::EndElement: {
if (xml.name() == "html")
break;
textString += QString("</%1>").arg(xml.name().toString());
break;
}
default: {
break;
}
}
}
if (xml.hasError()) {
qWarning() << "error while parsing xml" << xml.errorString() << doc;
doc.replace("<html>", "");
doc.replace("</html>", "");
return doc;
}
return textString;
}
QString