readingtime.go 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // Package readingtime provides a function to estimate the reading time of an article.
  4. package readingtime
  5. import (
  6. "math"
  7. "strings"
  8. "unicode/utf8"
  9. "miniflux.app/v2/internal/reader/sanitizer"
  10. "github.com/abadojack/whatlanggo"
  11. )
  12. // EstimateReadingTime returns the estimated reading time of an article in minute.
  13. func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
  14. sanitizedContent := sanitizer.StripTags(content)
  15. // Litterature on language detection says that around 100 signes is enough, we're safe here.
  16. truncationPoint := int(math.Min(float64(len(sanitizedContent)), 250))
  17. // We're only interested in identifying Japanse/Chinese/Korean
  18. options := whatlanggo.Options{
  19. Whitelist: map[whatlanggo.Lang]bool{
  20. whatlanggo.Jpn: true,
  21. whatlanggo.Cmn: true,
  22. whatlanggo.Kor: true,
  23. },
  24. }
  25. langInfo := whatlanggo.DetectWithOptions(sanitizedContent[:truncationPoint], options)
  26. if langInfo.IsReliable() {
  27. return int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
  28. }
  29. nbOfWords := len(strings.Fields(sanitizedContent))
  30. return int(math.Ceil(float64(nbOfWords) / float64(defaultReadingSpeed)))
  31. }