url_rewrite_test.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
  4. import (
  5. "testing"
  6. "miniflux.app/v2/internal/model"
  7. )
  8. func TestRewriteEntryURL(t *testing.T) {
  9. scenarios := []struct {
  10. name string
  11. feed *model.Feed
  12. entry *model.Entry
  13. expectedURL string
  14. description string
  15. }{
  16. {
  17. name: "NoRewriteRules",
  18. feed: &model.Feed{
  19. ID: 1,
  20. FeedURL: "https://example.com/feed.xml",
  21. UrlRewriteRules: "",
  22. },
  23. entry: &model.Entry{
  24. URL: "https://example.com/article/123",
  25. },
  26. expectedURL: "https://example.com/article/123",
  27. description: "Should return original URL when no rewrite rules are specified",
  28. },
  29. {
  30. name: "EmptyRewriteRules",
  31. feed: &model.Feed{
  32. ID: 1,
  33. FeedURL: "https://example.com/feed.xml",
  34. UrlRewriteRules: " ",
  35. },
  36. entry: &model.Entry{
  37. URL: "https://example.com/article/123",
  38. },
  39. expectedURL: "https://example.com/article/123",
  40. description: "Should return original URL when rewrite rules are empty/whitespace",
  41. },
  42. {
  43. name: "ValidRewriteRule",
  44. feed: &model.Feed{
  45. ID: 1,
  46. FeedURL: "https://example.com/feed.xml",
  47. UrlRewriteRules: `rewrite("^https://example.com/article/(.+)"|"https://example.com/full-article/$1")`,
  48. },
  49. entry: &model.Entry{
  50. URL: "https://example.com/article/123",
  51. },
  52. expectedURL: "https://example.com/full-article/123",
  53. description: "Should rewrite URL according to the regex pattern",
  54. },
  55. {
  56. name: "ComplexRegexRewrite",
  57. feed: &model.Feed{
  58. ID: 1,
  59. FeedURL: "https://news.ycombinator.com/rss",
  60. UrlRewriteRules: `rewrite("^https://news\.ycombinator\.com/item\?id=(.+)"|"https://hn.algolia.com/api/v1/items/$1")`,
  61. },
  62. entry: &model.Entry{
  63. URL: "https://news.ycombinator.com/item?id=12345",
  64. },
  65. expectedURL: "https://hn.algolia.com/api/v1/items/12345",
  66. description: "Should handle complex regex patterns with escaped characters",
  67. },
  68. {
  69. name: "NoMatchingPattern",
  70. feed: &model.Feed{
  71. ID: 1,
  72. FeedURL: "https://example.com/feed.xml",
  73. UrlRewriteRules: `rewrite("^https://different.com/(.+)"|"https://rewritten.com/$1")`,
  74. },
  75. entry: &model.Entry{
  76. URL: "https://example.com/article/123",
  77. },
  78. expectedURL: "https://example.com/article/123",
  79. description: "Should return original URL when regex pattern doesn't match",
  80. },
  81. {
  82. name: "InvalidRegexPattern",
  83. feed: &model.Feed{
  84. ID: 1,
  85. FeedURL: "https://example.com/feed.xml",
  86. UrlRewriteRules: `rewrite("^https://example.com/[invalid"|"https://rewritten.com/$1")`,
  87. },
  88. entry: &model.Entry{
  89. URL: "https://example.com/article/123",
  90. },
  91. expectedURL: "https://example.com/article/123",
  92. description: "Should return original URL when regex pattern is invalid",
  93. },
  94. {
  95. name: "MalformedRewriteRule",
  96. feed: &model.Feed{
  97. ID: 1,
  98. FeedURL: "https://example.com/feed.xml",
  99. UrlRewriteRules: `rewrite("invalid format")`,
  100. },
  101. entry: &model.Entry{
  102. URL: "https://example.com/article/123",
  103. },
  104. expectedURL: "https://example.com/article/123",
  105. description: "Should return original URL when rewrite rule format is malformed",
  106. },
  107. {
  108. name: "MultipleGroups",
  109. feed: &model.Feed{
  110. ID: 1,
  111. FeedURL: "https://example.com/feed.xml",
  112. UrlRewriteRules: `rewrite("^https://example.com/([^/]+)/article/(.+)"|"https://example.com/full/$1/story/$2")`,
  113. },
  114. entry: &model.Entry{
  115. URL: "https://example.com/tech/article/ai-news",
  116. },
  117. expectedURL: "https://example.com/full/tech/story/ai-news",
  118. description: "Should handle multiple capture groups in regex",
  119. },
  120. {
  121. name: "URLWithSpecialCharacters",
  122. feed: &model.Feed{
  123. ID: 1,
  124. FeedURL: "https://example.com/feed.xml",
  125. UrlRewriteRules: `rewrite("^https://example.com/(.+)"|"https://proxy.example.com/$1")`,
  126. },
  127. entry: &model.Entry{
  128. URL: "https://example.com/article/test?param=value&other=123#section",
  129. },
  130. expectedURL: "https://proxy.example.com/article/test?param=value&other=123#section",
  131. description: "Should handle URLs with query parameters and fragments",
  132. },
  133. {
  134. name: "ReplaceWithStaticURL",
  135. feed: &model.Feed{
  136. ID: 1,
  137. FeedURL: "https://example.com/feed.xml",
  138. UrlRewriteRules: `rewrite("^https://example.com/(.+)"|"https://static.example.com/reader")`,
  139. },
  140. entry: &model.Entry{
  141. URL: "https://example.com/article/123",
  142. },
  143. expectedURL: "https://static.example.com/reader",
  144. description: "Should replace with static URL when no capture groups are used in replacement",
  145. },
  146. {
  147. name: "EmptyReplacementString",
  148. feed: &model.Feed{
  149. ID: 1,
  150. FeedURL: "https://example.com/feed.xml",
  151. UrlRewriteRules: `rewrite("^https://example.com/(.+)"|"x")`,
  152. },
  153. entry: &model.Entry{
  154. URL: "https://example.com/article/123",
  155. },
  156. expectedURL: "x",
  157. description: "Should replace with specified string",
  158. },
  159. {
  160. name: "EmptyReplacementNotSupported",
  161. feed: &model.Feed{
  162. ID: 1,
  163. FeedURL: "https://example.com/feed.xml",
  164. UrlRewriteRules: `rewrite("^https://example.com/(.+)"|"")`,
  165. },
  166. entry: &model.Entry{
  167. URL: "https://example.com/article/123",
  168. },
  169. expectedURL: "https://example.com/article/123",
  170. description: "Should return original URL when replacement is empty string (not supported by regex pattern)",
  171. },
  172. {
  173. name: "InvalidRewriteRuleFormat",
  174. feed: &model.Feed{
  175. ID: 1,
  176. FeedURL: "https://example.com/feed.xml",
  177. UrlRewriteRules: `not-a-rewrite-rule`,
  178. },
  179. entry: &model.Entry{
  180. URL: "https://example.com/article/123",
  181. },
  182. expectedURL: "https://example.com/article/123",
  183. description: "Should return original URL when rewrite rule doesn't match expected format",
  184. },
  185. }
  186. for _, scenario := range scenarios {
  187. t.Run(scenario.name, func(t *testing.T) {
  188. result := RewriteEntryURL(scenario.feed, scenario.entry)
  189. if result != scenario.expectedURL {
  190. t.Errorf("Expected URL %q, got %q. Description: %s", scenario.expectedURL, result, scenario.description)
  191. }
  192. })
  193. }
  194. }
  195. func TestRewriteEntryURLWithNilValues(t *testing.T) {
  196. t.Run("NilFeed", func(t *testing.T) {
  197. entry := &model.Entry{URL: "https://example.com/article/123"}
  198. // This should panic or handle gracefully - let's see what happens
  199. defer func() {
  200. if r := recover(); r == nil {
  201. t.Error("Expected panic when feed is nil, but function completed normally")
  202. }
  203. }()
  204. RewriteEntryURL(nil, entry)
  205. })
  206. t.Run("NilEntry", func(t *testing.T) {
  207. feed := &model.Feed{
  208. ID: 1,
  209. FeedURL: "https://example.com/feed.xml",
  210. UrlRewriteRules: `rewrite("^https://example.com/(.+)"|"https://rewritten.com/$1")`,
  211. }
  212. // This should panic or handle gracefully - let's see what happens
  213. defer func() {
  214. if r := recover(); r == nil {
  215. t.Error("Expected panic when entry is nil, but function completed normally")
  216. }
  217. }()
  218. RewriteEntryURL(feed, nil)
  219. })
  220. }
  221. func TestCustomReplaceRuleRegex(t *testing.T) {
  222. scenarios := []struct {
  223. name string
  224. input string
  225. expected []string
  226. matches bool
  227. }{
  228. {
  229. name: "ValidRule",
  230. input: `rewrite("^https://example.com/(.+)"|"https://rewritten.com/$1")`,
  231. expected: []string{`rewrite("^https://example.com/(.+)"|"https://rewritten.com/$1")`, `^https://example.com/(.+)`, `https://rewritten.com/$1`},
  232. matches: true,
  233. },
  234. {
  235. name: "ValidRuleWithEscapedCharacters",
  236. input: `rewrite("^https://news\\.ycombinator\\.com/item\\?id=(.+)"|"https://hn.algolia.com/api/v1/items/$1")`,
  237. expected: []string{`rewrite("^https://news\\.ycombinator\\.com/item\\?id=(.+)"|"https://hn.algolia.com/api/v1/items/$1")`, `^https://news\\.ycombinator\\.com/item\\?id=(.+)`, `https://hn.algolia.com/api/v1/items/$1`},
  238. matches: true,
  239. },
  240. {
  241. name: "InvalidFormat",
  242. input: `rewrite("invalid")`,
  243. expected: nil,
  244. matches: false,
  245. },
  246. {
  247. name: "EmptyString",
  248. input: ``,
  249. expected: nil,
  250. matches: false,
  251. },
  252. {
  253. name: "RandomText",
  254. input: `some random text`,
  255. expected: nil,
  256. matches: false,
  257. },
  258. }
  259. for _, scenario := range scenarios {
  260. t.Run(scenario.name, func(t *testing.T) {
  261. parts := customReplaceRuleRegex.FindStringSubmatch(scenario.input)
  262. if scenario.matches {
  263. if len(parts) < 3 {
  264. t.Errorf("Expected regex to match and return at least 3 parts, got %d parts: %v", len(parts), parts)
  265. return
  266. }
  267. // Check the full match and captured groups
  268. if parts[0] != scenario.expected[0] {
  269. t.Errorf("Expected full match %q, got %q", scenario.expected[0], parts[0])
  270. }
  271. if parts[1] != scenario.expected[1] {
  272. t.Errorf("Expected first capture group %q, got %q", scenario.expected[1], parts[1])
  273. }
  274. if parts[2] != scenario.expected[2] {
  275. t.Errorf("Expected second capture group %q, got %q", scenario.expected[2], parts[2])
  276. }
  277. } else if len(parts) >= 3 {
  278. t.Errorf("Expected regex not to match, but got %d parts: %v", len(parts), parts)
  279. }
  280. })
  281. }
  282. }