mirror of
https://github.com/Nheko-Reborn/nheko.git
synced 2024-11-29 06:08:48 +03:00
Merge pull request #1347 from mauke/url-regex-war-crime
Allow nested ()/[] brackets in URLs (fixes #1346)
This commit is contained in:
commit
17331fcf83
1 changed files with 45 additions and 5 deletions
50
src/Config.h
50
src/Config.h
|
@ -26,11 +26,51 @@ constexpr auto LABEL_MEDIUM_SIZE_RATIO = 1.3;
|
||||||
namespace strings {
|
namespace strings {
|
||||||
const QString url_html = QStringLiteral("<a href=\"\\1\">\\1</a>");
|
const QString url_html = QStringLiteral("<a href=\"\\1\">\\1</a>");
|
||||||
const QRegularExpression url_regex(
|
const QRegularExpression url_regex(
|
||||||
// match an URL, that is not quoted, i.e.
|
// match an unquoted URL
|
||||||
// vvvvvv match quote via negative lookahead/lookbehind vv
|
[](){
|
||||||
// vvvv atomic match url -> fail if there is a " before or after vvv
|
const auto
|
||||||
QStringLiteral(
|
general_unicode = QStringLiteral(R"((?:[^\x{0}-\x{7f}\p{Cc}\s\p{P}]|[\x{2010}\x{2011}\x{2012}\x{2013}\x{2014}\x{2015}]))"),
|
||||||
R"((?<!["'])(?>((www\.(?!\.)|[a-z][a-z0-9+.-]*://)[^\s<>'"]+[^!,\.\s<>'"\]\)\:]))(?!["']))"));
|
protocol = QStringLiteral(R"((?:[Hh][Tt][Tt][Pp][Ss]?))"),
|
||||||
|
unreserved_subdelims_colon = QStringLiteral(R"([a-zA-Z0-9\-._~!$&'()*+,;=:])"),
|
||||||
|
pct_enc = QStringLiteral(R"((?:%[[:xdigit:]]{2}))"),
|
||||||
|
userinfo = "(?:" + unreserved_subdelims_colon + "*(?:" + pct_enc + unreserved_subdelims_colon + "*)*)",
|
||||||
|
dec_octet = QStringLiteral(R"((?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))"),
|
||||||
|
ipv4_addr = "(?:" + dec_octet + R"((?:\.)" + dec_octet + "){3})",
|
||||||
|
h16 = QStringLiteral(R"((?:[[:xdigit:]]{1,4}))"),
|
||||||
|
ls32 = "(?:" + h16 + ":" + h16 + "|" + ipv4_addr + ")",
|
||||||
|
ipv6_addr = "(?:"
|
||||||
|
"(?:" + h16 + ":){6}" + ls32
|
||||||
|
+ "|" "::(?:" + h16 + ":){5}" + ls32
|
||||||
|
+ "|" + h16 + "?::(?:" + h16 + ":){4}" + ls32
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,1})?::(?:" + h16 + ":){3}" + ls32
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,2})?::(?:" + h16 + ":){2}" + ls32
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,3})?::" + h16 + ":" + ls32
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,4})?::" + ls32
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,5})?::" + h16
|
||||||
|
+ "|" "(?:" + h16 + "(?::" + h16 + "){0,6})?::"
|
||||||
|
")",
|
||||||
|
ipvfuture = R"((?:v[[:xdigit:]]+\.)" + unreserved_subdelims_colon + "+)",
|
||||||
|
ip_literal = R"((?:\[(?:)" + ipv6_addr + "|" + ipvfuture + R"()\]))",
|
||||||
|
host_alnum = "(?:[a-zA-Z0-9]|" + general_unicode + ")",
|
||||||
|
host_label = "(?:" + host_alnum + "+(?:-+" + host_alnum + "+)*)",
|
||||||
|
hostname = "(?:" + host_label + R"((?:\.)" + host_label + R"()*\.?))",
|
||||||
|
host = "(?:" + hostname + "|" + ip_literal + ")",
|
||||||
|
path = R"((?:/((?:[a-zA-Z0-9\-._~!$&'*+,;=:@/]|)" + pct_enc + R"(|\((?-1)\)|)" + general_unicode + ")*))",
|
||||||
|
query = R"(((?:[a-zA-Z0-9\-._~!$&'*+,;=:@/?\\{}]|)" + pct_enc + R"(|\((?-1)\)|\[(?-1)\]|)" + general_unicode + ")*)",
|
||||||
|
fragment = query;
|
||||||
|
return
|
||||||
|
R"((?<!["'\w])(?>()"
|
||||||
|
+ protocol + "://"
|
||||||
|
+ "(?:" + userinfo + "@)?"
|
||||||
|
+ host + "(?::[0-9]+)?"
|
||||||
|
+ path + "?"
|
||||||
|
R"((?:\?)" + query + ")?"
|
||||||
|
R"((?:#)" + fragment + ")?"
|
||||||
|
"(?<![.!?,;:'])"
|
||||||
|
R"())(?!["']))";
|
||||||
|
}(),
|
||||||
|
QRegularExpression::UseUnicodePropertiesOption
|
||||||
|
);
|
||||||
// A matrix link to be converted back to markdown
|
// A matrix link to be converted back to markdown
|
||||||
static const QRegularExpression
|
static const QRegularExpression
|
||||||
matrixToLink(QStringLiteral(R"(<a href=\"(https://matrix.to/#/.*?)\">(.*?)</a>)"));
|
matrixToLink(QStringLiteral(R"(<a href=\"(https://matrix.to/#/.*?)\">(.*?)</a>)"));
|
||||||
|
|
Loading…
Reference in a new issue