Просмотр исходного кода

Minor internal/reader/readability/readability.go speedup

- Don't use a capturing group in `divToPElementsRegexp`
- Remove a duplicate condition
- Replace a regex with a fixed-comparison and a `Contains`
jvoisin 2 лет назад
Родитель
Сommit
4db138d4b8
1 измененных файлов с 6 добавлено и 5 удалено
  1. 6 5
      internal/reader/readability/readability.go

+ 6 - 5
internal/reader/readability/readability.go

@@ -21,8 +21,7 @@ const (
 )
 
 var (
-	divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
-	sentenceRegexp       = regexp.MustCompile(`\.( |$)`)
+	divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
 
 	blacklistCandidatesRegexp  = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
 	okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
@@ -114,9 +113,11 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 			content := s.Text()
 			contentLength := len(content)
 
-			if contentLength >= 80 && linkDensity < .25 {
-				append = true
-			} else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) {
+			if contentLength >= 80 {
+				if linkDensity < .25 {
+					append = true
+				}
+			} else if linkDensity == 0 && (content[len(content)-1] == '.' || strings.Contains(content, ". ")) {
 				append = true
 			}
 		}