atom_03.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "encoding/base64"
  6. "html"
  7. "strings"
  8. )
  9. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  10. type atom03Feed struct {
  11. Version string `xml:"version,attr"`
  12. // The "atom:id" element's content conveys a permanent, globally unique identifier for the feed.
  13. // It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element,
  14. // but MUST NOT contain more than one. The content of this element, when present, MUST be a URI.
  15. ID string `xml:"http://purl.org/atom/ns# id"`
  16. // The "atom:title" element is a Content construct that conveys a human-readable title for the feed.
  17. // atom:feed elements MUST contain exactly one atom:title element.
  18. // If the feed describes a Web resource, its content SHOULD be the same as that resource's title.
  19. Title atom03Content `xml:"http://purl.org/atom/ns# title"`
  20. // The "atom:link" element is a Link construct that conveys a URI associated with the feed.
  21. // The nature of the relationship as well as the link itself is determined by the element's content.
  22. // atom:feed elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
  23. // atom:feed elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
  24. // atom:feed elements MAY contain additional atom:link elements beyond those described above.
  25. Links atomLinks `xml:"http://purl.org/atom/ns# link"`
  26. // The "atom:author" element is a Person construct that indicates the default author of the feed.
  27. // atom:feed elements MUST contain exactly one atom:author element,
  28. // UNLESS all of the atom:feed element's child atom:entry elements contain an atom:author element.
  29. // atom:feed elements MUST NOT contain more than one atom:author element.
  30. Author AtomPerson `xml:"http://purl.org/atom/ns# author"`
  31. // The "atom:entry" element's represents an individual entry that is contained by the feed.
  32. // atom:feed elements MAY contain one or more atom:entry elements.
  33. Entries []atom03Entry `xml:"http://purl.org/atom/ns# entry"`
  34. }
  35. type atom03Entry struct {
  36. // The "atom:id" element's content conveys a permanent, globally unique identifier for the entry.
  37. // It MUST NOT change over time, even if other representations of the entry (such as a web representation pointed to by the entry's atom:link element) are relocated.
  38. // If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds.
  39. ID string `xml:"id"`
  40. // The "atom:title" element is a Content construct that conveys a human-readable title for the entry.
  41. // atom:entry elements MUST have exactly one "atom:title" element.
  42. // If an entry describes a Web resource, its content SHOULD be the same as that resource's title.
  43. Title atom03Content `xml:"title"`
  44. // The "atom:modified" element is a Date construct that indicates the time that the entry was last modified.
  45. // atom:entry elements MUST contain an atom:modified element, but MUST NOT contain more than one.
  46. // The content of an atom:modified element MUST have a time zone whose value SHOULD be "UTC".
  47. Modified string `xml:"modified"`
  48. // The "atom:issued" element is a Date construct that indicates the time that the entry was issued.
  49. // atom:entry elements MUST contain an atom:issued element, but MUST NOT contain more than one.
  50. // The content of an atom:issued element MAY omit a time zone.
  51. Issued string `xml:"issued"`
  52. // The "atom:created" element is a Date construct that indicates the time that the entry was created.
  53. // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
  54. // The content of an atom:created element MUST have a time zone whose value SHOULD be "UTC".
  55. // If atom:created is not present, its content MUST considered to be the same as that of atom:modified.
  56. Created string `xml:"created"`
  57. // The "atom:link" element is a Link construct that conveys a URI associated with the entry.
  58. // The nature of the relationship as well as the link itself is determined by the element's content.
  59. // atom:entry elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
  60. // atom:entry elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
  61. // atom:entry elements MAY contain additional atom:link elements beyond those described above.
  62. Links atomLinks `xml:"link"`
  63. // The "atom:summary" element is a Content construct that conveys a short summary, abstract or excerpt of the entry.
  64. // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
  65. Summary atom03Content `xml:"summary"`
  66. // The "atom:content" element is a Content construct that conveys the content of the entry.
  67. // atom:entry elements MAY contain one or more atom:content elements.
  68. Content atom03Content `xml:"content"`
  69. // The "atom:author" element is a Person construct that indicates the default author of the entry.
  70. // atom:entry elements MUST contain exactly one atom:author element,
  71. // UNLESS the atom:feed element containing them contains an atom:author element itself.
  72. // atom:entry elements MUST NOT contain more than one atom:author element.
  73. Author AtomPerson `xml:"author"`
  74. }
  75. type atom03Content struct {
  76. // Content constructs MAY have a "type" attribute, whose value indicates the media type of the content.
  77. // When present, this attribute's value MUST be a registered media type [RFC2045].
  78. // If not present, its value MUST be considered to be "text/plain".
  79. Type string `xml:"type,attr"`
  80. // Content constructs MAY have a "mode" attribute, whose value indicates the method used to encode the content.
  81. // When present, this attribute's value MUST be listed below.
  82. // If not present, its value MUST be considered to be "xml".
  83. //
  84. // "xml": A mode attribute with the value "xml" indicates that the element's content is inline xml (for example, namespace-qualified XHTML).
  85. //
  86. // "escaped": A mode attribute with the value "escaped" indicates that the element's content is an escaped string.
  87. // Processors MUST unescape the element's content before considering it as content of the indicated media type.
  88. //
  89. // "base64": A mode attribute with the value "base64" indicates that the element's content is base64-encoded [RFC2045].
  90. // Processors MUST decode the element's content before considering it as content of the the indicated media type.
  91. Mode string `xml:"mode,attr"`
  92. CharData string `xml:",chardata"`
  93. InnerXML string `xml:",innerxml"`
  94. }
  95. func (a *atom03Content) content() string {
  96. content := ""
  97. switch a.Mode {
  98. case "xml":
  99. content = a.InnerXML
  100. case "escaped":
  101. content = a.CharData
  102. case "base64":
  103. b, err := base64.StdEncoding.DecodeString(a.CharData)
  104. if err == nil {
  105. content = string(b)
  106. }
  107. default:
  108. content = a.CharData
  109. }
  110. if a.Type != "text/html" {
  111. content = html.EscapeString(content)
  112. }
  113. return strings.TrimSpace(content)
  114. }