// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
import (
"encoding/base64"
"fmt"
"html"
"log/slog"
"net/url"
"regexp"
"strconv"
"strings"
"unicode"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
nethtml "golang.org/x/net/html"
"github.com/PuerkitoBio/goquery"
)
var (
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
)
// titlelize returns a copy of the string s with all Unicode letters that begin words
// mapped to their Unicode title case.
func titlelize(s string) string {
// A closure is used here to remember the previous character
// so that we can check if there is a space preceding the current
// character.
previous := ' '
return strings.Map(
func(current rune) rune {
if unicode.IsSpace(previous) {
previous = current
return unicode.ToTitle(current)
}
previous = current
return current
}, strings.ToLower(s))
}
func addImageTitle(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
matches := doc.Find("img[src][title]")
if matches.Length() > 0 {
matches.Each(func(i int, img *goquery.Selection) {
altAttr := img.AttrOr("alt", "")
srcAttr, _ := img.Attr("src")
titleAttr, _ := img.Attr("title")
img.ReplaceWithHtml(` ` + html.EscapeString(titleAttr) + ``)
}
break
}
}
// Srcset-linked candidates
for _, candidateAttr := range candidateSrcsetAttrs {
if srcAttr, found := img.Attr(candidateAttr); found {
changed = true
if img.Is("img") {
img.SetAttr("srcset", srcAttr)
} else {
altAttr := img.AttrOr("alt", "")
img.ReplaceWithHtml(`
`)
}
break
}
}
})
if !changed {
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
if img := noscript.Find("img"); img.Length() == 1 {
img.Unwrap()
changed = true
}
})
}
if changed {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
return entryContent
}
func addDynamicIframe(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
// Ordered most preferred to least preferred.
candidateAttrs := []string{
"data-src",
"data-original",
"data-orig",
"data-url",
"data-lazy-src",
}
changed := false
doc.Find("iframe").Each(func(i int, iframe *goquery.Selection) {
for _, candidateAttr := range candidateAttrs {
if srcAttr, found := iframe.Attr(candidateAttr); found {
changed = true
iframe.SetAttr("src", srcAttr)
break
}
}
})
if changed {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
return entryContent
}
func fixMediumImages(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
noscriptElement := paragraphImage.Find("noscript")
if noscriptElement.Length() > 0 {
paragraphImage.ReplaceWithHtml(noscriptElement.Text())
}
})
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
func useNoScriptImages(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
imgElement := figureElement.Find("img")
if imgElement.Length() > 0 {
noscriptElement := figureElement.Find("noscript")
if noscriptElement.Length() > 0 {
figureElement.PrependHtml(noscriptElement.Text())
imgElement.Remove()
noscriptElement.Remove()
}
}
})
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
func getYoutubVideoIDFromURL(entryURL string) string {
u, err := url.Parse(entryURL)
if err != nil {
return ""
}
if !strings.HasSuffix(u.Hostname(), "youtube.com") {
return ""
}
if u.Path == "/watch" {
if v := u.Query().Get("v"); v != "" {
return v
}
return ""
}
if id, found := strings.CutPrefix(u.Path, "/shorts/"); found {
if len(id) == 11 {
// youtube shorts id are always 11 chars.
return id
}
}
return ""
}
func buildVideoPlayerIframe(absoluteVideoURL string) string {
return ``
}
func addVideoPlayerIframe(absoluteVideoURL, entryContent string) string {
return buildVideoPlayerIframe(absoluteVideoURL) + `
` + entryContent
}
func addYoutubeVideoRewriteRule(entryURL, entryContent string) string {
if videoURL := getYoutubVideoIDFromURL(entryURL); videoURL != "" {
return addVideoPlayerIframe(config.Opts.YouTubeEmbedUrlOverride()+videoURL, entryContent)
}
return entryContent
}
func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
if videoURL := getYoutubVideoIDFromURL(entryURL); videoURL != "" {
return addVideoPlayerIframe(`https://`+config.Opts.InvidiousInstance()+`/embed/`+videoURL, entryContent)
}
return entryContent
}
// For reference: https://github.com/miniflux/v2/pull/1314
func addYoutubeVideoFromId(entryContent string) string {
matches := youtubeIdRegex.FindAllStringSubmatch(entryContent, -1)
if matches == nil {
return entryContent
}
var videoPlayerHTML strings.Builder
for _, match := range matches {
if len(match) == 2 {
videoPlayerHTML.WriteString(buildVideoPlayerIframe(config.Opts.YouTubeEmbedUrlOverride() + match[1]))
videoPlayerHTML.WriteString("
")
}
}
return videoPlayerHTML.String() + entryContent
}
func addInvidiousVideo(entryURL, entryContent string) string {
u, err := url.Parse(entryURL)
if err != nil {
return entryContent
}
if u.Path != "/watch" {
return entryContent
}
qs := u.Query()
videoID := qs.Get("v")
if videoID == "" {
return entryContent
}
qs.Del("v")
embedVideoURL := "https://" + u.Hostname() + `/embed/` + videoID
if len(qs) > 0 {
embedVideoURL += "?" + qs.Encode()
}
return addVideoPlayerIframe(embedVideoURL, entryContent)
}
func addPDFLink(entryURL, entryContent string) string {
if strings.HasSuffix(entryURL, ".pdf") {
return fmt.Sprintf(`PDF
%s`, entryURL, entryContent)
}
return entryContent
}
func addEnclosureLinks(entry *model.Entry) string {
var links strings.Builder
for _, enclosure := range entry.Enclosures {
if enclosure.URL == "" {
continue
}
enclosureURL := html.EscapeString(enclosure.URL)
links.WriteString(`