| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package atom // import "miniflux.app/v2/internal/reader/atom"
- import (
- "log/slog"
- "slices"
- "sort"
- "strconv"
- "strings"
- "time"
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
- )
- type atom10Adapter struct {
- atomFeed *atom10Feed
- }
- func NewAtom10Adapter(atomFeed *atom10Feed) *atom10Adapter {
- return &atom10Adapter{atomFeed}
- }
- func (a *atom10Adapter) BuildFeed(baseURL string) *model.Feed {
- feed := new(model.Feed)
- // Populate the feed URL.
- feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
- if feedURL != "" {
- if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
- feed.FeedURL = absoluteFeedURL
- }
- } else {
- feed.FeedURL = baseURL
- }
- // Populate the site URL.
- siteURL := a.atomFeed.Links.originalLink()
- if siteURL != "" {
- if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
- feed.SiteURL = absoluteSiteURL
- }
- } else {
- feed.SiteURL = baseURL
- }
- // Populate the feed title.
- feed.Title = a.atomFeed.Title.body()
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
- // Populate the feed description.
- feed.Description = a.atomFeed.Subtitle.body()
- // Populate the feed icon.
- if a.atomFeed.Icon != "" {
- if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Icon); err == nil {
- feed.IconURL = absoluteIconURL
- }
- } else if a.atomFeed.Logo != "" {
- if absoluteLogoURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Logo); err == nil {
- feed.IconURL = absoluteLogoURL
- }
- }
- feed.Entries = a.populateEntries(feed.SiteURL)
- return feed
- }
- func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
- entries := make(model.Entries, 0, len(a.atomFeed.Entries))
- for _, atomEntry := range a.atomFeed.Entries {
- entry := model.NewEntry()
- // Populate the entry URL.
- entry.URL = atomEntry.Links.originalLink()
- if entry.URL != "" {
- if absoluteEntryURL, err := urllib.AbsoluteURL(siteURL, entry.URL); err == nil {
- entry.URL = absoluteEntryURL
- }
- }
- // Populate the entry content.
- entry.Content = atomEntry.Content.body()
- if entry.Content == "" {
- entry.Content = atomEntry.Summary.body()
- if entry.Content == "" {
- entry.Content = atomEntry.FirstMediaDescription()
- }
- }
- // Populate the entry title.
- entry.Title = atomEntry.Title.title()
- if entry.Title == "" {
- entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
- if entry.Title == "" {
- entry.Title = entry.URL
- }
- }
- // Populate the entry author.
- authors := atomEntry.Authors.personNames()
- if len(authors) == 0 {
- authors = a.atomFeed.Authors.personNames()
- }
- sort.Strings(authors)
- authors = slices.Compact(authors)
- entry.Author = strings.Join(authors, ", ")
- // Populate the entry date.
- for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
- if value != "" {
- if parsedDate, err := date.Parse(value); err != nil {
- slog.Debug("Unable to parse date from Atom 1.0 feed",
- slog.String("date", value),
- slog.String("url", entry.URL),
- slog.Any("error", err),
- )
- } else {
- entry.Date = parsedDate
- break
- }
- }
- }
- if entry.Date.IsZero() {
- entry.Date = time.Now()
- }
- // Populate categories.
- categories := atomEntry.Categories.CategoryNames()
- if len(categories) == 0 {
- categories = a.atomFeed.Categories.CategoryNames()
- }
- // Sort and deduplicate categories.
- sort.Strings(categories)
- entry.Tags = slices.Compact(categories)
- // Populate the commentsURL if defined.
- // See https://tools.ietf.org/html/rfc4685#section-4
- // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
- // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
- commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
- if urllib.IsAbsoluteURL(commentsURL) {
- entry.CommentsURL = commentsURL
- }
- // Generate the entry hash.
- for _, value := range []string{atomEntry.ID, atomEntry.Links.originalLink()} {
- if value != "" {
- entry.Hash = crypto.SHA256(value)
- break
- }
- }
- // Populate the entry enclosures.
- uniqueEnclosuresMap := make(map[string]bool)
- for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
- mediaURL := strings.TrimSpace(mediaThumbnail.URL)
- if mediaURL == "" {
- continue
- }
- if _, found := uniqueEnclosuresMap[mediaURL]; !found {
- if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
- slog.Debug("Unable to build absolute URL for media thumbnail",
- slog.String("url", mediaThumbnail.URL),
- slog.String("site_url", siteURL),
- slog.Any("error", err),
- )
- } else {
- uniqueEnclosuresMap[mediaAbsoluteURL] = true
- entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
- URL: mediaAbsoluteURL,
- MimeType: mediaThumbnail.MimeType(),
- Size: mediaThumbnail.Size(),
- })
- }
- }
- }
- for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
- absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, link.Href)
- if err != nil {
- slog.Debug("Unable to resolve absolute URL for enclosure",
- slog.String("enclosure_url", link.Href),
- slog.String("entry_url", entry.URL),
- slog.Any("error", err),
- )
- } else {
- if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; !found {
- uniqueEnclosuresMap[absoluteEnclosureURL] = true
- length, _ := strconv.ParseInt(link.Length, 10, 0)
- entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
- URL: absoluteEnclosureURL,
- MimeType: link.Type,
- Size: length,
- })
- }
- }
- }
- for _, mediaContent := range atomEntry.AllMediaContents() {
- mediaURL := strings.TrimSpace(mediaContent.URL)
- if mediaURL == "" {
- continue
- }
- if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
- slog.Debug("Unable to build absolute URL for media content",
- slog.String("url", mediaContent.URL),
- slog.String("site_url", siteURL),
- slog.Any("error", err),
- )
- } else {
- if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
- uniqueEnclosuresMap[mediaAbsoluteURL] = true
- entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
- URL: mediaAbsoluteURL,
- MimeType: mediaContent.MimeType(),
- Size: mediaContent.Size(),
- })
- }
- }
- }
- for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
- mediaURL := strings.TrimSpace(mediaPeerLink.URL)
- if mediaURL == "" {
- continue
- }
- if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
- slog.Debug("Unable to build absolute URL for media peer link",
- slog.String("url", mediaPeerLink.URL),
- slog.String("site_url", siteURL),
- slog.Any("error", err),
- )
- } else {
- if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
- uniqueEnclosuresMap[mediaAbsoluteURL] = true
- entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
- URL: mediaAbsoluteURL,
- MimeType: mediaPeerLink.MimeType(),
- Size: mediaPeerLink.Size(),
- })
- }
- }
- }
- entries = append(entries, entry)
- }
- return entries
- }
|