Просмотр исходного кода

feat(reader): split common sorting routine into separate function

Instead of creating unsorted slice and then sorting, just make slice sorted.
gudvinr 3 недель назад
Родитель
Сommit
684ea3d224

+ 1 - 0
internal/reader/atom/atom_03_adapter.go

@@ -5,6 +5,7 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
 
 import (
 	"log/slog"
+	"strings"
 	"time"
 
 	"miniflux.app/v2/internal/crypto"

+ 4 - 11
internal/reader/atom/atom_10_adapter.go

@@ -5,8 +5,6 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
 
 import (
 	"log/slog"
-	"slices"
-	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -110,8 +108,7 @@ func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
 		if len(authors) == 0 {
 			authors = a.atomFeed.Authors.personNames()
 		}
-		sort.Strings(authors)
-		authors = slices.Compact(authors)
+
 		entry.Author = strings.Join(authors, ", ")
 
 		// Populate the entry date.
@@ -139,15 +136,11 @@ func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
 		}
 
 		// Populate categories.
-		categories := atomEntry.Categories.CategoryNames()
-		if len(categories) == 0 {
-			categories = a.atomFeed.Categories.CategoryNames()
+		entry.Tags = atomEntry.Categories.CategoryNames()
+		if len(entry.Tags) == 0 {
+			entry.Tags = a.atomFeed.Categories.CategoryNames()
 		}
 
-		// Sort and deduplicate categories.
-		sort.Strings(categories)
-		entry.Tags = slices.Compact(categories)
-
 		// Populate the commentsURL if defined.
 		// See https://tools.ietf.org/html/rfc4685#section-4
 		// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".

+ 39 - 24
internal/reader/atom/atom_common.go

@@ -4,6 +4,8 @@
 package atom // import "miniflux.app/v2/internal/reader/atom"
 
 import (
+	"cmp"
+	"slices"
 	"strings"
 )
 
@@ -32,19 +34,9 @@ func (a *AtomPerson) PersonName() string {
 
 type atomPersons []*AtomPerson
 
+// personNames returns sorted and deduplicated author names.
 func (a atomPersons) personNames() []string {
-	names := make([]string, 0, len(a))
-	authorNamesMap := make(map[string]bool, len(a))
-
-	for _, person := range a {
-		personName := person.PersonName()
-		if _, ok := authorNamesMap[personName]; !ok {
-			names = append(names, personName)
-			authorNamesMap[personName] = true
-		}
-	}
-
-	return names
+	return makeSorted((*AtomPerson).PersonName, a)
 }
 
 // Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7
@@ -134,22 +126,45 @@ type atomCategory struct {
 	Label string `xml:"label,attr"`
 }
 
+func (ac atomCategory) name() string {
+	name := strings.TrimSpace(ac.Label)
+	if name != "" {
+		return name
+	}
+
+	name = strings.TrimSpace(ac.Term)
+	if name != "" {
+		return name
+	}
+
+	return ""
+}
+
 type atomCategories []atomCategory
 
+// CategoryNames returns sorted and deduplicated category names.
 func (ac atomCategories) CategoryNames() []string {
-	categories := make([]string, 0, len(ac))
-
-	for _, category := range ac {
-		label := strings.TrimSpace(category.Label)
-		if label != "" {
-			categories = append(categories, label)
-		} else {
-			term := strings.TrimSpace(category.Term)
-			if term != "" {
-				categories = append(categories, term)
-			}
+	return makeSorted(atomCategory.name, ac)
+}
+
+func makeSorted[I any, O cmp.Ordered](fn func(I) O, values []I) []O {
+	var zero O
+
+	sorted := make([]O, 0, len(values))
+	for _, in := range values {
+		out := fn(in)
+		if out == zero {
+			continue
 		}
+
+		where, found := slices.BinarySearch(sorted, out)
+		if found {
+			continue
+		}
+
+		// Insert sorted to avoid duplicates.
+		sorted = slices.Insert(sorted, where, out)
 	}
 
-	return categories
+	return sorted
 }

+ 28 - 8
internal/reader/itunes/itunes.go

@@ -3,7 +3,10 @@
 
 package itunes // import "miniflux.app/v2/internal/reader/itunes"
 
-import "strings"
+import (
+	"iter"
+	"strings"
+)
 
 // Specs: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
 type ItunesChannelElement struct {
@@ -22,15 +25,16 @@ type ItunesChannelElement struct {
 	ItunesType       string                  `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
 }
 
-func (i *ItunesChannelElement) GetItunesCategories() []string {
-	categories := make([]string, 0, len(i.ItunesCategories))
-	for _, category := range i.ItunesCategories {
-		categories = append(categories, category.Text)
-		if category.SubCategory != nil {
-			categories = append(categories, category.SubCategory.Text)
+func (i *ItunesChannelElement) ItunesCategoriesSeq() iter.Seq[string] {
+	return func(yield func(string) bool) {
+		for _, category := range i.ItunesCategories {
+			for text := range category.All() {
+				if !yield(text) {
+					return
+				}
+			}
 		}
 	}
-	return categories
 }
 
 type ItunesItemElement struct {
@@ -56,6 +60,22 @@ type ItunesCategoryElement struct {
 	SubCategory *ItunesCategoryElement `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd category"`
 }
 
+// All returns iterator for all category names including every nested [ItunesCategoryElement.SubCategory].
+func (cat *ItunesCategoryElement) All() iter.Seq[string] {
+	return func(yield func(string) bool) {
+		for ; cat != nil; cat = cat.SubCategory {
+			text := strings.TrimSpace(cat.Text)
+			if text == "" {
+				continue
+			}
+
+			if !yield(text) {
+				return
+			}
+		}
+	}
+}
+
 type ItunesOwnerElement struct {
 	Name  string `xml:"name"`
 	Email string `xml:"email"`

+ 34 - 23
internal/reader/json/adapter.go

@@ -4,6 +4,7 @@
 package json // import "miniflux.app/v2/internal/reader/json"
 
 import (
+	"cmp"
 	"log/slog"
 	"slices"
 	"strings"
@@ -134,20 +135,12 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 		}
 
 		// Populate the entry author.
-		itemAuthors := j.jsonFeed.Authors
-		itemAuthors = append(itemAuthors, item.Authors...)
-		itemAuthors = append(itemAuthors, item.Author, j.jsonFeed.Author)
-
-		var authorNames = make([]string, 0, len(itemAuthors))
-		for _, author := range itemAuthors {
-			authorName := strings.TrimSpace(author.Name)
-			if authorName != "" {
-				authorNames = append(authorNames, authorName)
-			}
-		}
+		authorNames := make([]string, 0, len(j.jsonFeed.Authors)+len(item.Authors)+1+1)
+
+		authorNames = appendSorted(authorNames, JSONAuthor.name, j.jsonFeed.Authors...)
+		authorNames = appendSorted(authorNames, JSONAuthor.name, item.Authors...)
+		authorNames = appendSorted(authorNames, JSONAuthor.name, item.Author, j.jsonFeed.Author)
 
-		slices.Sort(authorNames)
-		authorNames = slices.Compact(authorNames)
 		entry.Author = strings.Join(authorNames, ", ")
 
 		// Populate the entry enclosures.
@@ -175,16 +168,8 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 		}
 
 		// Populate the entry tags.
-		for _, tag := range item.Tags {
-			tag = strings.TrimSpace(tag)
-			if tag != "" {
-				entry.Tags = append(entry.Tags, tag)
-			}
-		}
-
-		// Sort and deduplicate tags.
-		slices.Sort(entry.Tags)
-		entry.Tags = slices.Compact(entry.Tags)
+		entry.Tags = make([]string, 0, len(item.Tags))
+		entry.Tags = appendSorted(entry.Tags, strings.TrimSpace, item.Tags...)
 
 		// Generate a hash for the entry.
 		for _, value := range []string{item.ID, item.URL, item.ExternalURL, item.ContentText + item.ContentHTML + item.Summary} {
@@ -200,3 +185,29 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 
 	return feed
 }
+
+// appendSortedSeq appends elements from "values" slice into "sorted" slice.
+//   - "fn" applied to every element of "values"
+//   - elements inserted into "sorted" slice so it stays sorted
+//   - duplicate elements are not inserted
+func appendSorted[I any, O cmp.Ordered](sorted []O, fn func(I) O, values ...I) []O {
+	var zero O
+
+	sorted = slices.Grow(sorted, len(values))
+	for in := range slices.Values(values) {
+		out := fn(in)
+		if out == zero {
+			continue
+		}
+
+		where, found := slices.BinarySearch(sorted, out)
+		if found {
+			continue
+		}
+
+		// Insert sorted to avoid duplicates.
+		sorted = slices.Insert(sorted, where, out)
+	}
+
+	return sorted
+}

+ 8 - 1
internal/reader/json/json.go

@@ -3,7 +3,10 @@
 
 package json // import "miniflux.app/v2/internal/reader/json"
 
-import "encoding/json"
+import (
+	"encoding/json"
+	"strings"
+)
 
 // JSON Feed specs:
 // https://www.jsonfeed.org/version/1.1/
@@ -64,6 +67,10 @@ type JSONAuthor struct {
 	AvatarURL string `json:"avatar"`
 }
 
+func (a JSONAuthor) name() string {
+	return strings.TrimSpace(a.Name)
+}
+
 // JSONAuthors unmarshals either an array or a single author object.
 // Some feeds incorrectly use an object for "authors"; we accept it to avoid failing the whole feed.
 type JSONAuthors []JSONAuthor

+ 8 - 7
internal/reader/media/media.go

@@ -4,6 +4,7 @@
 package media // import "miniflux.app/v2/internal/reader/media"
 
 import (
+	"iter"
 	"regexp"
 	"strconv"
 	"strings"
@@ -174,15 +175,15 @@ func (dl DescriptionList) First() string {
 
 type MediaCategoryList []MediaCategory
 
-func (mcl MediaCategoryList) Labels() []string {
-	var labels []string
-	for _, category := range mcl {
-		label := strings.TrimSpace(category.Label)
-		if label != "" {
-			labels = append(labels, label)
+func (mcl MediaCategoryList) LabelsSeq() iter.Seq[string] {
+	return func(yield func(string) bool) {
+		for _, category := range mcl {
+			label := strings.TrimSpace(category.Label)
+			if !yield(label) {
+				return
+			}
 		}
 	}
-	return labels
 }
 
 type MediaCategory struct {

+ 40 - 36
internal/reader/rss/adapter.go

@@ -4,7 +4,9 @@
 package rss // import "miniflux.app/v2/internal/reader/rss"
 
 import (
+	"cmp"
 	"html"
+	"iter"
 	"log/slog"
 	"path"
 	"slices"
@@ -153,9 +155,6 @@ func (r *rssAdapter) buildFeed(baseURL string) *model.Feed {
 		if len(entry.Tags) == 0 {
 			entry.Tags = findFeedTags(&r.rss.Channel)
 		}
-		// Sort and deduplicate tags.
-		slices.Sort(entry.Tags)
-		entry.Tags = slices.Compact(entry.Tags)
 
 		feed.Entries = append(feed.Entries, entry)
 	}
@@ -184,26 +183,12 @@ func findFeedAuthor(rssChannel *rssChannel) string {
 }
 
 func findFeedTags(rssChannel *rssChannel) []string {
-	itunesCategories := rssChannel.GetItunesCategories()
-	tags := make([]string, 0, len(rssChannel.Categories)+len(itunesCategories)+1)
+	tags := make([]string, 0, len(rssChannel.Categories)+2*len(rssChannel.ItunesCategories)+1)
 
-	for _, tag := range rssChannel.Categories {
-		tag = strings.TrimSpace(tag)
-		if tag != "" {
-			tags = append(tags, tag)
-		}
-	}
-
-	for _, tag := range itunesCategories {
-		tag = strings.TrimSpace(tag)
-		if tag != "" {
-			tags = append(tags, tag)
-		}
-	}
+	tags = appendSorted(tags, strings.TrimSpace, rssChannel.Categories...)
+	tags = appendSortedSeq(tags, strings.TrimSpace, rssChannel.ItunesCategoriesSeq())
 
-	if tag := strings.TrimSpace(rssChannel.GooglePlayCategory.Text); tag != "" {
-		tags = append(tags, tag)
-	}
+	tags = appendSorted(tags, strings.TrimSpace, rssChannel.GooglePlayCategory.Text)
 
 	return tags
 }
@@ -303,22 +288,10 @@ func findEntryAuthor(rssItem *rssItem) string {
 }
 
 func findEntryTags(rssItem *rssItem) []string {
-	mediaLabels := rssItem.MediaCategories.Labels()
-	tags := make([]string, 0, len(rssItem.Categories)+len(mediaLabels))
+	tags := make([]string, 0, len(rssItem.Categories)+len(rssItem.MediaCategories))
 
-	for _, tag := range rssItem.Categories {
-		tag = strings.TrimSpace(tag)
-		if tag != "" {
-			tags = append(tags, tag)
-		}
-	}
-
-	for _, tag := range mediaLabels {
-		tag = strings.TrimSpace(tag)
-		if tag != "" {
-			tags = append(tags, tag)
-		}
-	}
+	tags = appendSorted(tags, strings.TrimSpace, rssItem.Categories...)
+	tags = appendSortedSeq(tags, strings.TrimSpace, rssItem.MediaCategories.LabelsSeq())
 
 	return tags
 }
@@ -452,3 +425,34 @@ func findEntryEnclosures(rssItem *rssItem, siteURL string) model.EnclosureList {
 
 	return enclosures
 }
+
+// appendSorted is identical to [appendSortedSeq] except receives variadic values rather than [iter.Seq].
+func appendSorted[I any, O cmp.Ordered](sorted []O, fn func(I) O, values ...I) []O {
+	sorted = slices.Grow(sorted, len(values))
+	return appendSortedSeq(sorted, fn, slices.Values(values))
+}
+
+// appendSortedSeq appends elements from "values" iterator into "sorted" slice.
+//   - "fn" applied to every element of "values"
+//   - elements inserted into "sorted" slice so it stays sorted
+//   - duplicate elements are not inserted
+func appendSortedSeq[I any, O cmp.Ordered](sorted []O, fn func(I) O, values iter.Seq[I]) []O {
+	var zero O
+
+	for in := range values {
+		out := fn(in)
+		if out == zero {
+			continue
+		}
+
+		where, found := slices.BinarySearch(sorted, out)
+		if found {
+			continue
+		}
+
+		// Insert sorted to avoid duplicates.
+		sorted = slices.Insert(sorted, where, out)
+	}
+
+	return sorted
+}