Просмотр исходного кода

Easier full-text search possibility (#4505)

* Easier full-text search possibility
Contributes to https://github.com/FreshRSS/FreshRSS/issues/1331
Avoid concats in searches to make text indexes easier to build

* Fix tests

* Documentation
Alexandre Alapetite 3 лет назад
Родитель
Сommit
4f111c5b30

+ 11 - 13
app/Models/EntryDAO.php

@@ -10,10 +10,6 @@ class FreshRSS_EntryDAO extends Minz_ModelPdo implements FreshRSS_Searchable {
 		return true;
 	}
 
-	protected static function sqlConcat($s1, $s2) {
-		return 'CONCAT(' . $s1 . ',' . $s2 . ')';	//MySQL
-	}
-
 	public static function sqlHexDecode(string $x): string {
 		return 'unhex(' . $x . ')';
 	}
@@ -950,47 +946,49 @@ SQL;
 			}
 			if ($filter->getInurl()) {
 				foreach ($filter->getInurl() as $url) {
-					$sub_search .= 'AND ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ? ';
+					$sub_search .= 'AND ' . $alias . 'link LIKE ? ';
 					$values[] = "%{$url}%";
 				}
 			}
 
 			if ($filter->getNotAuthor()) {
 				foreach ($filter->getNotAuthor() as $author) {
-					$sub_search .= 'AND (NOT ' . $alias . 'author LIKE ?) ';
+					$sub_search .= 'AND ' . $alias . 'author NOT LIKE ? ';
 					$values[] = "%{$author}%";
 				}
 			}
 			if ($filter->getNotIntitle()) {
 				foreach ($filter->getNotIntitle() as $title) {
-					$sub_search .= 'AND (NOT ' . $alias . 'title LIKE ?) ';
+					$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? ';
 					$values[] = "%{$title}%";
 				}
 			}
 			if ($filter->getNotTags()) {
 				foreach ($filter->getNotTags() as $tag) {
-					$sub_search .= 'AND (NOT ' . $alias . 'tags LIKE ?) ';
+					$sub_search .= 'AND ' . $alias . 'tags NOT LIKE ? ';
 					$values[] = "%{$tag}%";
 				}
 			}
 			if ($filter->getNotInurl()) {
 				foreach ($filter->getNotInurl() as $url) {
-					$sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'link', $alias . 'guid') . ' LIKE ?) ';
+					$sub_search .= 'AND ' . $alias . 'link NOT LIKE ? ';
 					$values[] = "%{$url}%";
 				}
 			}
 
 			if ($filter->getSearch()) {
 				foreach ($filter->getSearch() as $search_value) {
-					$sub_search .= 'AND ' . static::sqlConcat($alias . 'title',
-						static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ? ';
+					$sub_search .= 'AND (' . $alias . 'title LIKE ? OR ' .
+						(static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
+					$values[] = "%{$search_value}%";
 					$values[] = "%{$search_value}%";
 				}
 			}
 			if ($filter->getNotSearch()) {
 				foreach ($filter->getNotSearch() as $search_value) {
-					$sub_search .= 'AND (NOT ' . static::sqlConcat($alias . 'title',
-						static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' LIKE ?) ';
+					$sub_search .= 'AND ' . $alias . 'title NOT LIKE ? AND ' .
+						(static::isCompressed() ? 'UNCOMPRESS(' . $alias . 'content_bin)' : '' . $alias . 'content') . ' NOT LIKE ? ';
+					$values[] = "%{$search_value}%";
 					$values[] = "%{$search_value}%";
 				}
 			}

+ 0 - 4
app/Models/EntryDAOSQLite.php

@@ -10,10 +10,6 @@ class FreshRSS_EntryDAOSQLite extends FreshRSS_EntryDAO {
 		return false;
 	}
 
-	protected static function sqlConcat($s1, $s2) {
-		return $s1 . '||' . $s2;
-	}
-
 	public static function sqlHexDecode(string $x): string {
 		return $x;
 	}

+ 1 - 0
docs/en/admins/01_Index.md

@@ -18,6 +18,7 @@ Learn how to install, update, and backup FreshRSS, as well as how to use the com
 * [Setting Up Automatic Feed Updating](08_FeedUpdates.md)
 * [Access Control](09_AccessControl.md)
 * [Apache/Nginx configuration files](10_ServerConfig.md)
+* [Database configuration](DatabaseConfig.md)
 * [Using the command line interface (CLI)](https://github.com/FreshRSS/FreshRSS/tree/edge/cli)
 * [Configuring the email address validation](05_Configuring_email_validation.md)
 * [Frequently asked questions](04_Frequently_Asked_Questions.md)

+ 33 - 0
docs/en/admins/DatabaseConfig.md

@@ -0,0 +1,33 @@
+# Database configuration
+
+FreshRSS supports the databases SQLite (built-in), PostgreSQL, MySQL / MariaDB.
+
+While the default installation should be fine for most cases, additional tuning can be made.
+
+## Full-text search optimisation in PostgreSQL
+
+Without changing anything in FreshRSS’ code (which is using [`ILIKE`](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE)), it is possible to make text searches much faster by adding some indexes in PostgreSQL 9.1+ (at the cost of more disc space and slower insertions):
+
+```sql
+CREATE EXTENSION pg_trgm;
+CREATE INDEX gin_trgm_index_title ON freshrss_entry USING gin(title gin_trgm_ops);
+CREATE INDEX gin_trgm_index_content ON freshrss_entry USING gin(content gin_trgm_ops);
+```
+
+Where `freshrss_entry` needs to be adapted to the name of the *entry* of a given use, e.g., `freshrss_alice_entry`.
+
+Such an index on the `entry.title` column makes searches such as `intitle:Hello` much faster.
+General searches such as `Something` search both on `entry.title` and `entry.content` and therefore require the two indexes shown above.
+
+Likewise, if you wanted to speed up searches on the authors (`author:Alice`), you would add another index:
+
+```sql
+CREATE INDEX gin_trgm_index_author ON freshrss_entry USING gin(author gin_trgm_ops);
+```
+
+Etc. for other text fields. The list of fields can be seen in [`CREATE TABLE _entry` section](https://github.com/FreshRSS/FreshRSS/blob/edge/app/SQL/install.sql.pgsql.php).
+
+### References
+
+* [GIN: Generalized Inverted Index](https://www.postgresql.org/docs/current/gin-intro.html)
+* [`pg_trgm` module for fast text search](https://www.postgresql.org/docs/current/pgtrgm.html#id-1.11.7.42.8)

+ 4 - 0
docs/en/developers/03_Backend/01_Database_schema.md

@@ -1,3 +1,7 @@
 # Database Schema
 
 > **TODO**
+
+## See also
+
+* [Database configuration](../../admins/DatabaseConfig.md)

+ 5 - 5
tests/app/Models/SearchTest.php

@@ -318,17 +318,17 @@ class SearchTest extends PHPUnit\Framework\TestCase {
 			],
 			[
 				'#tag Hello OR (author:Alice inurl:example) OR (f:3 intitle:World) OR L:12',
-				' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) OR ((e.author LIKE ? AND e.link||e.guid LIKE ? )) OR' .
+				' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) OR ((e.author LIKE ? AND e.link LIKE ? )) OR' .
 					' ((e.id_feed IN (?) AND e.title LIKE ? )) OR ((e.id IN (SELECT et.id_entry FROM `_entrytag` et WHERE et.id_tag IN (?)) )) ',
-				['%tag%','%Hello%','%Alice%','%example%','3','%World%', '12']
+				['%tag%','%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', '12']
 			],
 			[
 				'#tag Hello (author:Alice inurl:example) (f:3 intitle:World) label:Bleu',
-				' ((e.tags LIKE ? AND e.title||e.content LIKE ? )) AND' .
-					' ((e.author LIKE ? AND e.link||e.guid LIKE ? )) AND' .
+				' ((e.tags LIKE ? AND (e.title LIKE ? OR e.content LIKE ?) )) AND' .
+					' ((e.author LIKE ? AND e.link LIKE ? )) AND' .
 					' ((e.id_feed IN (?) AND e.title LIKE ? )) AND' .
 					' ((e.id IN (SELECT et.id_entry FROM `_entrytag` et, `_tag` t WHERE et.id_tag = t.id AND t.name IN (?)) )) ',
-				['%tag%','%Hello%','%Alice%','%example%','3','%World%', 'Bleu']
+				['%tag%', '%Hello%', '%Hello%', '%Alice%', '%example%', '3', '%World%', 'Bleu']
 			],
 			[
 				'!((author:Alice intitle:hello) OR (author:Bob intitle:world))',