Locator.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  37. * @author Ryan Parman
  38. * @author Geoffrey Sneddon
  39. * @author Ryan McCue
  40. * @link http://simplepie.org/ SimplePie
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. */
  43. /**
  44. * Used for feed auto-discovery
  45. *
  46. *
  47. * This class can be overloaded with {@see SimplePie::set_locator_class()}
  48. *
  49. * @package SimplePie
  50. */
  51. class SimplePie_Locator
  52. {
  53. var $useragent;
  54. var $timeout;
  55. var $file;
  56. var $local = array();
  57. var $elsewhere = array();
  58. var $cached_entities = array();
  59. var $http_base;
  60. var $base;
  61. var $base_location = 0;
  62. var $checked_feeds = 0;
  63. var $max_checked_feeds = 10;
  64. protected $registry;
  65. public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
  66. {
  67. $this->file = $file;
  68. $this->useragent = $useragent;
  69. $this->timeout = $timeout;
  70. $this->max_checked_feeds = $max_checked_feeds;
  71. if (class_exists('DOMDocument'))
  72. {
  73. $this->dom = new DOMDocument();
  74. set_error_handler(array('SimplePie_Misc', 'silence_errors'));
  75. $this->dom->loadHTML($this->file->body);
  76. restore_error_handler();
  77. }
  78. else
  79. {
  80. $this->dom = null;
  81. }
  82. }
  83. public function set_registry(SimplePie_Registry $registry)
  84. {
  85. $this->registry = $registry;
  86. }
  87. public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
  88. {
  89. if ($this->is_feed($this->file))
  90. {
  91. return $this->file;
  92. }
  93. if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  94. {
  95. $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
  96. if ($sniffer->get_type() !== 'text/html')
  97. {
  98. return null;
  99. }
  100. }
  101. if ($type & ~SIMPLEPIE_LOCATOR_NONE)
  102. {
  103. $this->get_base();
  104. }
  105. if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
  106. {
  107. return $working[0];
  108. }
  109. if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
  110. {
  111. if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
  112. {
  113. return $working[0];
  114. }
  115. if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
  116. {
  117. return $working[0];
  118. }
  119. if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
  120. {
  121. return $working[0];
  122. }
  123. if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
  124. {
  125. return $working[0];
  126. }
  127. }
  128. return null;
  129. }
  130. public function is_feed($file, $check_html = false)
  131. {
  132. if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
  133. {
  134. $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
  135. $sniffed = $sniffer->get_type();
  136. $mime_types = array('application/rss+xml', 'application/rdf+xml',
  137. 'text/rdf', 'application/atom+xml', 'text/xml',
  138. 'application/xml', 'application/x-rss+xml');
  139. if ($check_html)
  140. {
  141. $mime_types[] = 'text/html';
  142. }
  143. if (in_array($sniffed, $mime_types))
  144. {
  145. return true;
  146. }
  147. else
  148. {
  149. return false;
  150. }
  151. }
  152. elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
  153. {
  154. return true;
  155. }
  156. else
  157. {
  158. return false;
  159. }
  160. }
  161. public function get_base()
  162. {
  163. if ($this->dom === null)
  164. {
  165. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  166. }
  167. $this->http_base = $this->file->url;
  168. $this->base = $this->http_base;
  169. $elements = $this->dom->getElementsByTagName('base');
  170. foreach ($elements as $element)
  171. {
  172. if ($element->hasAttribute('href'))
  173. {
  174. $base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
  175. if ($base === false)
  176. {
  177. continue;
  178. }
  179. $this->base = $base;
  180. $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
  181. break;
  182. }
  183. }
  184. }
  185. public function autodiscovery()
  186. {
  187. $done = array();
  188. $feeds = array();
  189. $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
  190. $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
  191. $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
  192. if (!empty($feeds))
  193. {
  194. return array_values($feeds);
  195. }
  196. else
  197. {
  198. return null;
  199. }
  200. }
  201. protected function search_elements_by_tag($name, &$done, $feeds)
  202. {
  203. if ($this->dom === null)
  204. {
  205. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  206. }
  207. $links = $this->dom->getElementsByTagName($name);
  208. foreach ($links as $link)
  209. {
  210. if ($this->checked_feeds === $this->max_checked_feeds)
  211. {
  212. break;
  213. }
  214. if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
  215. {
  216. $rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel')))));
  217. $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
  218. if ($this->base_location < $line)
  219. {
  220. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  221. }
  222. else
  223. {
  224. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  225. }
  226. if ($href === false)
  227. {
  228. continue;
  229. }
  230. if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
  231. {
  232. $this->checked_feeds++;
  233. $headers = array(
  234. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  235. );
  236. $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
  237. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true))
  238. {
  239. $feeds[$href] = $feed;
  240. }
  241. }
  242. $done[] = $href;
  243. }
  244. }
  245. return $feeds;
  246. }
  247. public function get_links()
  248. {
  249. if ($this->dom === null)
  250. {
  251. throw new SimplePie_Exception('DOMDocument not found, unable to use locator');
  252. }
  253. $links = $this->dom->getElementsByTagName('a');
  254. foreach ($links as $link)
  255. {
  256. if ($link->hasAttribute('href'))
  257. {
  258. $href = trim($link->getAttribute('href'));
  259. $parsed = $this->registry->call('Misc', 'parse_url', array($href));
  260. if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
  261. {
  262. if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
  263. {
  264. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
  265. }
  266. else
  267. {
  268. $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
  269. }
  270. if ($href === false)
  271. {
  272. continue;
  273. }
  274. $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
  275. if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
  276. {
  277. $this->local[] = $href;
  278. }
  279. else
  280. {
  281. $this->elsewhere[] = $href;
  282. }
  283. }
  284. }
  285. }
  286. $this->local = array_unique($this->local);
  287. $this->elsewhere = array_unique($this->elsewhere);
  288. if (!empty($this->local) || !empty($this->elsewhere))
  289. {
  290. return true;
  291. }
  292. return null;
  293. }
  294. public function get_rel_link($rel)
  295. {
  296. if ($this->dom === null)
  297. {
  298. throw new SimplePie_Exception('DOMDocument not found, unable to use '.
  299. 'locator');
  300. }
  301. if (!class_exists('DOMXpath'))
  302. {
  303. throw new SimplePie_Exception('DOMXpath not found, unable to use '.
  304. 'get_rel_link');
  305. }
  306. $xpath = new DOMXpath($this->dom);
  307. $query = '//a[@rel and @href] | //link[@rel and @href]';
  308. foreach ($xpath->query($query) as $link)
  309. {
  310. $href = trim($link->getAttribute('href'));
  311. $parsed = $this->registry->call('Misc', 'parse_url', array($href));
  312. if ($parsed['scheme'] === '' ||
  313. preg_match('/^https?$/i', $parsed['scheme']))
  314. {
  315. if (method_exists($link, 'getLineNo') &&
  316. $this->base_location < $link->getLineNo())
  317. {
  318. $href =
  319. $this->registry->call('Misc', 'absolutize_url',
  320. array(trim($link->getAttribute('href')),
  321. $this->base));
  322. }
  323. else
  324. {
  325. $href =
  326. $this->registry->call('Misc', 'absolutize_url',
  327. array(trim($link->getAttribute('href')),
  328. $this->http_base));
  329. }
  330. if ($href === false)
  331. {
  332. return null;
  333. }
  334. $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
  335. if (in_array($rel, $rel_values))
  336. {
  337. return $href;
  338. }
  339. }
  340. }
  341. return null;
  342. }
  343. public function extension(&$array)
  344. {
  345. foreach ($array as $key => $value)
  346. {
  347. if ($this->checked_feeds === $this->max_checked_feeds)
  348. {
  349. break;
  350. }
  351. if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
  352. {
  353. $this->checked_feeds++;
  354. $headers = array(
  355. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  356. );
  357. $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
  358. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  359. {
  360. return array($feed);
  361. }
  362. else
  363. {
  364. unset($array[$key]);
  365. }
  366. }
  367. }
  368. return null;
  369. }
  370. public function body(&$array)
  371. {
  372. foreach ($array as $key => $value)
  373. {
  374. if ($this->checked_feeds === $this->max_checked_feeds)
  375. {
  376. break;
  377. }
  378. if (preg_match('/(rss|rdf|atom|xml)/i', $value))
  379. {
  380. $this->checked_feeds++;
  381. $headers = array(
  382. 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
  383. );
  384. $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
  385. if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
  386. {
  387. return array($feed);
  388. }
  389. else
  390. {
  391. unset($array[$key]);
  392. }
  393. }
  394. }
  395. return null;
  396. }
  397. }