|
|
@@ -42,8 +42,9 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|
|
|
|
|
slog.Debug("Processing entry",
|
|
|
slog.Int64("user_id", user.ID),
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("entry_url", entry.URL),
|
|
|
+ slog.String("entry_hash", entry.Hash),
|
|
|
+ slog.String("entry_title", entry.Title),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
slog.String("feed_url", feed.FeedURL),
|
|
|
)
|
|
|
@@ -52,14 +53,18 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|
|
}
|
|
|
|
|
|
websiteURL := getUrlFromEntry(feed, entry)
|
|
|
- entryIsNew := !store.EntryURLExists(feed.ID, entry.URL)
|
|
|
+ entryIsNew := store.IsNewEntry(feed.ID, entry.Hash)
|
|
|
if feed.Crawler && (entryIsNew || forceRefresh) {
|
|
|
slog.Debug("Scraping entry",
|
|
|
slog.Int64("user_id", user.ID),
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("entry_url", entry.URL),
|
|
|
+ slog.String("entry_hash", entry.Hash),
|
|
|
+ slog.String("entry_title", entry.Title),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
slog.String("feed_url", feed.FeedURL),
|
|
|
+ slog.Bool("entry_is_new", entryIsNew),
|
|
|
+ slog.Bool("force_refresh", forceRefresh),
|
|
|
+ slog.String("website_url", websiteURL),
|
|
|
)
|
|
|
|
|
|
startTime := time.Now()
|
|
|
@@ -90,7 +95,6 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|
|
if scraperErr != nil {
|
|
|
slog.Warn("Unable to scrape entry",
|
|
|
slog.Int64("user_id", user.ID),
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("entry_url", entry.URL),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
slog.String("feed_url", feed.FeedURL),
|
|
|
@@ -134,7 +138,6 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool {
|
|
|
|
|
|
if compiledBlocklist.MatchString(entry.URL) || compiledBlocklist.MatchString(entry.Title) || compiledBlocklist.MatchString(entry.Author) || containsBlockedTag {
|
|
|
slog.Debug("Blocking entry based on rule",
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("entry_url", entry.URL),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
slog.String("feed_url", feed.FeedURL),
|
|
|
@@ -165,7 +168,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
|
|
|
|
|
|
if compiledKeeplist.MatchString(entry.URL) || compiledKeeplist.MatchString(entry.Title) || compiledKeeplist.MatchString(entry.Author) || containsAllowedTag {
|
|
|
slog.Debug("Allow entry based on rule",
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("entry_url", entry.URL),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
slog.String("feed_url", feed.FeedURL),
|
|
|
@@ -230,7 +232,6 @@ func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
|
|
|
re := regexp.MustCompile(parts[1])
|
|
|
url = re.ReplaceAllString(entry.URL, parts[2])
|
|
|
slog.Debug("Rewriting entry URL",
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("original_entry_url", entry.URL),
|
|
|
slog.String("rewritten_entry_url", url),
|
|
|
slog.Int64("feed_id", feed.ID),
|
|
|
@@ -238,7 +239,6 @@ func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
|
|
|
)
|
|
|
} else {
|
|
|
slog.Debug("Cannot find search and replace terms for replace rule",
|
|
|
- slog.Int64("entry_id", entry.ID),
|
|
|
slog.String("original_entry_url", entry.URL),
|
|
|
slog.String("rewritten_entry_url", url),
|
|
|
slog.Int64("feed_id", feed.ID),
|