Selaa lähdekoodia

feat(entry): keep only metadata for removed entries

This should significantly shrink the space taken by miniflux' database:

```sql
miniflux=#
SELECT
  relname, pg_size_pretty(pg_total_relation_size(relname::regclass))
FROM
  pg_catalog.pg_statio_user_tables
ORDER BY
  pg_total_relation_size(relname::regclass)
DESC;

       relname        | pg_size_pretty
----------------------+----------------
 entries              | 158 MB
 icons                | 3312 kB
 enclosures           | 1568 kB
 sessions             | 1048 kB
 feeds                | 288 kB
 feed_icons           | 72 kB
 users                | 64 kB
 user_sessions        | 64 kB
 categories           | 48 kB
 integrations         | 32 kB
 api_keys             | 32 kB
 webauthn_credentials | 24 kB
 schema_version       | 16 kB
 acme_cache           | 16 kB
(14 rows)

miniflux=#
```

This should close #3524
jvoisin 7 kuukautta sitten
vanhempi
commit
5c26e06780
2 muutettua tiedostoa jossa 49 lisäystä ja 3 poistoa
  1. 7 0
      internal/cli/cleanup_tasks.go
  2. 42 3
      internal/storage/entry.go

+ 7 - 0
internal/cli/cleanup_tasks.go

@@ -46,4 +46,11 @@ func runCleanupTasks(store *storage.Storage) {
 			metric.ArchiveEntriesDuration.WithLabelValues(model.EntryStatusUnread).Observe(time.Since(startTime).Seconds())
 		}
 	}
+
+	if rowsAffected, err := store.DeleteContentRemovedEntries(); err != nil {
+		slog.Error("Unable to delete the content of removed entries", slog.Any("error", err))
+	} else {
+		slog.Info("Deleting content of removed entries completed",
+			slog.Int64("removed_entries_content_removed", rowsAffected))
+	}
 }

+ 42 - 3
internal/storage/entry.go

@@ -258,8 +258,8 @@ func (s *Storage) GetReadTime(feedID int64, entryHash string) int {
 	return result
 }
 
-// cleanupEntries deletes from the database entries marked as "removed" and not visible anymore in the feed.
-func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
+// deleteRemovedNonexistentEntries deletes from the database entries marked as "removed" and not visible anymore in the feed.
+func (s *Storage) deleteRemovedNonexistentEntries(feedID int64, entryHashes []string) error {
 	query := `
 		DELETE FROM
 			entries
@@ -275,6 +275,45 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
 	return nil
 }
 
+// deleteContentRemovedEntries deletes the content and corresponding enclosures
+// of entries marked as "removed", and only keeps their metadata.
+func (s *Storage) DeleteContentRemovedEntries() (int64, error) {
+	query := `
+		DELETE FROM
+			enclosures
+		WHERE
+		 	enclosures.entry_id IN
+				(SELECT id FROM entries WHERE status=$1)
+	`
+	if _, err := s.db.Exec(query, model.EntryStatusRemoved); err != nil {
+		return 0, fmt.Errorf(`store: unable to delete enclosures from removed entries: %v`, err)
+	}
+
+	query = `
+		UPDATE
+			entries
+		SET
+			title='',
+			content=NULL,
+			url='',
+			author=NULL
+		WHERE
+			status=$1
+	`
+
+	result, err := s.db.Exec(query, model.EntryStatusRemoved)
+	if err != nil {
+		return 0, fmt.Errorf(`store: unable to delete removed entries: %v`, err)
+	}
+
+	count, err := result.RowsAffected()
+	if err != nil {
+		return 0, fmt.Errorf(`store: unable to get the number of rows affected while deleting content from removed entries: %v`, err)
+	}
+
+	return count, nil
+}
+
 // RefreshFeedEntries updates feed entries while refreshing a feed.
 func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) {
 	entryHashes := make([]string, 0, len(entries))
@@ -322,7 +361,7 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
 	}
 
 	go func() {
-		if err := s.cleanupEntries(feedID, entryHashes); err != nil {
+		if err := s.deleteRemovedNonexistentEntries(feedID, entryHashes); err != nil {
 			slog.Error("Unable to cleanup entries",
 				slog.Int64("user_id", userID),
 				slog.Int64("feed_id", feedID),