Resolve relative URLs within RSS article description

PR #21943.


---------

Co-authored-by: Vladimir Golovnev <glassez@yandex.ru>
This commit is contained in:
Zentino 2024-12-20 15:19:34 +08:00 committed by GitHub
parent 34c8849f22
commit 257d928ab3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,6 +54,48 @@
#include "feedlistwidget.h"
#include "ui_rsswidget.h"
namespace
{
void convertRelativeUrlToAbsolute(QString &html, const QString &baseUrl)
{
const QRegularExpression rx {uR"(((<a\s+[^>]*?href|<img\s+[^>]*?src)\s*=\s*["'])((https?|ftp):)?(\/\/[^\/]*)?(\/?[^\/"].*?)(["']))"_s
, QRegularExpression::CaseInsensitiveOption};
const QString normalizedBaseUrl = baseUrl.endsWith(u'/') ? baseUrl : (baseUrl + u'/');
const QUrl url {normalizedBaseUrl};
const QString defaultScheme = url.scheme();
QRegularExpressionMatchIterator iter = rx.globalMatch(html);
while (iter.hasNext())
{
const QRegularExpressionMatch match = iter.next();
const QString scheme = match.captured(4);
const QString host = match.captured(5);
if (!scheme.isEmpty())
{
if (host.isEmpty())
break; // invalid URL, should never happen
// already absolute URL
continue;
}
QString relativePath = match.captured(6);
if (relativePath.startsWith(u'/'))
relativePath = relativePath.mid(1);
const QString absoluteUrl = !host.isEmpty()
? QString(defaultScheme + u':' + host) : (normalizedBaseUrl + relativePath);
const QString fullMatch = match.captured(0);
const QString prefix = match.captured(1);
const QString suffix = match.captured(7);
html.replace(fullMatch, (prefix + absoluteUrl + suffix));
}
}
}
RSSWidget::RSSWidget(IGUIApplication *app, QWidget *parent)
: GUIApplicationComponent(app, parent)
, m_ui {new Ui::RSSWidget}
@ -605,6 +647,11 @@ void RSSWidget::renderArticle(const RSS::Article *article) const
html += u"<pre>" + description + u"</pre>";
}
// Supplement relative URLs to absolute ones
const QUrl url {article->link()};
const QString baseUrl = url.toString(QUrl::RemovePath | QUrl::RemoveQuery);
convertRelativeUrlToAbsolute(html, baseUrl);
html += u"</div>";
m_ui->textBrowser->setHtml(html);
}