readability_test.go 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package readability // import "miniflux.app/v2/internal/reader/readability"
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func TestBaseURL(t *testing.T) {
  9. html := `
  10. <html>
  11. <head>
  12. <base href="https://example.org/ ">
  13. </head>
  14. <body>
  15. <article>
  16. Some content
  17. </article>
  18. </body>
  19. </html>`
  20. baseURL, _, err := ExtractContent(strings.NewReader(html))
  21. if err != nil {
  22. t.Fatal(err)
  23. }
  24. if baseURL != "https://example.org/" {
  25. t.Errorf(`Unexpected base URL, got %q instead of "https://example.org/"`, baseURL)
  26. }
  27. }
  28. func TestMultipleBaseURL(t *testing.T) {
  29. html := `
  30. <html>
  31. <head>
  32. <base href="https://example.org/ ">
  33. <base href="https://example.com/ ">
  34. </head>
  35. <body>
  36. <article>
  37. Some content
  38. </article>
  39. </body>
  40. </html>`
  41. baseURL, _, err := ExtractContent(strings.NewReader(html))
  42. if err != nil {
  43. t.Fatal(err)
  44. }
  45. if baseURL != "https://example.org/" {
  46. t.Errorf(`Unexpected base URL, got %q instead of "https://example.org/"`, baseURL)
  47. }
  48. }
  49. func TestRelativeBaseURL(t *testing.T) {
  50. html := `
  51. <html>
  52. <head>
  53. <base href="/test/ ">
  54. </head>
  55. <body>
  56. <article>
  57. Some content
  58. </article>
  59. </body>
  60. </html>`
  61. baseURL, _, err := ExtractContent(strings.NewReader(html))
  62. if err != nil {
  63. t.Fatal(err)
  64. }
  65. if baseURL != "" {
  66. t.Errorf(`Unexpected base URL, got %q`, baseURL)
  67. }
  68. }
  69. func TestWithoutBaseURL(t *testing.T) {
  70. html := `
  71. <html>
  72. <head>
  73. <title>Test</title>
  74. </head>
  75. <body>
  76. <article>
  77. Some content
  78. </article>
  79. </body>
  80. </html>`
  81. baseURL, _, err := ExtractContent(strings.NewReader(html))
  82. if err != nil {
  83. t.Fatal(err)
  84. }
  85. if baseURL != "" {
  86. t.Errorf(`Unexpected base URL, got %q instead of ""`, baseURL)
  87. }
  88. }