readingtime.go 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // Package readingtime provides a function to estimate the reading time of an article.
  4. package readingtime
  5. import (
  6. "math"
  7. "strings"
  8. "unicode"
  9. "unicode/utf8"
  10. "miniflux.app/v2/internal/reader/sanitizer"
  11. )
  12. // EstimateReadingTime returns the estimated reading time of an article in minute.
  13. func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
  14. sanitizedContent := sanitizer.StripTags(content)
  15. truncationPoint := min(len(sanitizedContent), 50)
  16. if isCJK(sanitizedContent[:truncationPoint]) {
  17. return int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
  18. }
  19. return int(math.Ceil(float64(countWords(sanitizedContent)) / float64(defaultReadingSpeed)))
  20. }
  21. func countWords(s string) int {
  22. n := 0
  23. for range strings.FieldsSeq(s) {
  24. n++
  25. }
  26. return n
  27. }
  28. func isCJK(text string) bool {
  29. totalCJK := 0
  30. for _, r := range text[:min(len(text), 50)] {
  31. if unicode.Is(unicode.Han, r) ||
  32. unicode.Is(unicode.Hangul, r) ||
  33. unicode.Is(unicode.Hiragana, r) ||
  34. unicode.Is(unicode.Katakana, r) ||
  35. unicode.Is(unicode.Yi, r) ||
  36. unicode.Is(unicode.Bopomofo, r) {
  37. totalCJK++
  38. }
  39. }
  40. // if at least 50% of the text is CJK, odds are that the text is in CJK.
  41. return totalCJK > len(text)/50
  42. }