readingtime.go 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. // Package readingtime provides a function to estimate the reading time of an article.
  4. package readingtime
  5. import (
  6. "math"
  7. "strings"
  8. "unicode"
  9. "unicode/utf8"
  10. "miniflux.app/v2/internal/reader/sanitizer"
  11. )
  12. // EstimateReadingTime returns the estimated reading time of an article in minute.
  13. func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
  14. sanitizedContent := sanitizer.StripTags(content)
  15. truncationPoint := min(len(sanitizedContent), 50)
  16. if isCJK(sanitizedContent[:truncationPoint]) {
  17. return int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
  18. }
  19. return int(math.Ceil(float64(len(strings.Fields(sanitizedContent))) / float64(defaultReadingSpeed)))
  20. }
  21. func isCJK(text string) bool {
  22. totalCJK := 0
  23. for _, r := range text[:min(len(text), 50)] {
  24. if unicode.Is(unicode.Han, r) ||
  25. unicode.Is(unicode.Hangul, r) ||
  26. unicode.Is(unicode.Hiragana, r) ||
  27. unicode.Is(unicode.Katakana, r) ||
  28. unicode.Is(unicode.Yi, r) ||
  29. unicode.Is(unicode.Bopomofo, r) {
  30. totalCJK++
  31. }
  32. }
  33. // if at least 50% of the text is CJK, odds are that the text is in CJK.
  34. return totalCJK > len(text)/50
  35. }