11 luni în urmă · 8a98926674
--- a/internal/reader/readability/readability.go
+++ b/internal/reader/readability/readability.go
@@ -107,6 +107,28 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
 
				 	return baseURL, extractedContent, nil
			
 
				 }
			
 
				 
			
 
				+func getSelectionLength(s *goquery.Selection) int {
			
 
				+	var getLengthOfTextContent func(*html.Node) int
			
 
				+	getLengthOfTextContent = func(n *html.Node) int {
			
 
				+		total := 0
			
 
				+		if n.Type == html.TextNode {
			
 
				+			total += len(n.Data)
			
 
				+		}
			
 
				+		if n.FirstChild != nil {
			
 
				+			for c := n.FirstChild; c != nil; c = c.NextSibling {
			
 
				+				total += getLengthOfTextContent(c)
			
 
				+			}
			
 
				+		}
			
 
				+		return total
			
 
				+	}
			
 
				+
			
 
				+	sum := 0
			
 
				+	for _, n := range s.Nodes {
			
 
				+		sum += getLengthOfTextContent(n)
			
 
				+	}
			
 
				+	return sum
			
 
				+}
			
 
				+
			
 
				 // Now that we have the top candidate, look through its siblings for content that might also be related.
			
 
				 // Things like preambles, content split by ads that we removed, etc.
			
 
				 func getArticle(topCandidate *candidate, candidates candidateList) string {
			
@@ -127,8 +149,7 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 
				 		} else if s.Is("p") {
			
 
				 			tag = node.Data
			
 
				 			linkDensity := getLinkDensity(s)
			
 
				-			content := s.Text()
			
 
				-			contentLength := len(content)
			
 
				+			contentLength := getSelectionLength(s)
			
 
				 
			
 
				 			if contentLength >= 80 {
			
 
				 				if linkDensity < .25 {
			
@@ -136,6 +157,8 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 
				 				}
			
 
				 			} else {
			
 
				 				if linkDensity == 0 {
			
 
				+					// It's a small selection, so .Text doesn't impact performances too much.
			
 
				+					content := s.Text()
			
 
				 					if containsSentence(content) {
			
 
				 						append = true
			
 
				 					}
			
@@ -223,10 +246,10 @@ func getCandidates(document *goquery.Document) candidateList {
 
				 	candidates := make(candidateList)
			
 
				 
			
 
				 	document.Find(defaultTagsToScore).Each(func(i int, s *goquery.Selection) {
			
 
				-		text := s.Text()
			
 
				+		textLen := getSelectionLength(s)
			
 
				 
			
 
				 		// If this paragraph is less than 25 characters, don't even count it.
			
 
				-		if len(text) < 25 {
			
 
				+		if textLen < 25 {
			
 
				 			return
			
 
				 		}
			
 
				 
			
@@ -253,10 +276,11 @@ func getCandidates(document *goquery.Document) candidateList {
 
				 		contentScore := float32(1.0)
			
 
				 
			
 
				 		// Add points for any commas within this paragraph.
			
 
				+		text := s.Text()
			
 
				 		contentScore += float32(strings.Count(text, ",") + 1)
			
 
				 
			
 
				 		// For every 100 characters in this paragraph, add another point. Up to 3 points.
			
 
				-		contentScore += float32(min(len(text)/100.0, 3))
			
 
				+		contentScore += float32(min(textLen/100.0, 3))
			
 
				 
			
 
				 		candidates[parentNode].score += contentScore
			
 
				 		if grandParentNode != nil {