1
0

wiki.tcl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. #
  2. # edited Jun 27 2017 for https by genewitch
  3. # ramok on freenode/#tcl knew the fix
  4. #
  5. # Mar 30 2010
  6. # by horgh
  7. #
  8. # Requires Tcl 8.5+ and tcllib
  9. #
  10. # Wikipedia.org fetcher
  11. #
  12. # To enable you must .chanset #channel +wiki
  13. #
  14. # Tests: Whole number (list of possible interpretations)
  15. #
  16. package require http
  17. package require htmlparse
  18. package require tls
  19. ::http::register https 443 ::tls::socket
  20. namespace eval wiki {
  21. variable max_lines 1
  22. variable max_chars 400
  23. variable output_cmd "putserv"
  24. variable url "https://en.wikipedia.org/wiki/"
  25. bind pub -|- "!w" wiki::search
  26. bind pub -|- "!wiki" wiki::search
  27. # variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
  28. variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))}
  29. setudef flag wiki
  30. }
  31. proc wiki::fetch {term {url {}}} {
  32. if {$url != ""} {
  33. set token [http::geturl $url -timeout 10000]
  34. } else {
  35. set query [http::formatQuery [regsub -all -- {\s} $term "_"]]
  36. set token [http::geturl ${wiki::url}${query} -timeout 10000]
  37. }
  38. set data [http::data $token]
  39. set ncode [http::ncode $token]
  40. set meta [http::meta $token]
  41. upvar #0 $token state
  42. set fetched_url $state(url)
  43. http::cleanup $token
  44. # debug
  45. putlog "Fetch! term: $term url: $url fetched: $fetched_url"
  46. set fid [open "w-debug.txt" w]
  47. puts $fid $data
  48. close $fid
  49. # Follow redirects
  50. if {[regexp -- {^3\d{2}$} $ncode]} {
  51. return [wiki::fetch $term [dict get $meta Location]]
  52. }
  53. if {$ncode != 200} {
  54. error "HTTP query failed ($ncode): $data: $meta"
  55. }
  56. # If page returns list of results, choose the first one and fetch that
  57. #if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} {
  58. # regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
  59. # return [wiki::fetch $new_query]
  60. #}
  61. if {![regexp -- $wiki::parse_regexp $data -> out]} {
  62. error "Parse error"
  63. }
  64. return [list url $fetched_url result [wiki::sanitise $out]]
  65. }
  66. proc wiki::sanitise {raw} {
  67. set raw [::htmlparse::mapEscapes $raw]
  68. # Remove some help links
  69. set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]
  70. set raw [regsub -all -- {<(.*?)>} $raw ""]
  71. set raw [regsub -all -- {\[.*?\]} $raw ""]
  72. set raw [regsub -all -- {\n} $raw " "]
  73. return $raw
  74. }
  75. proc wiki::search {nick uhost hand chan argv} {
  76. if {![channel get $chan wiki]} { return }
  77. if {[string length $argv] == 0} {
  78. $wiki::output_cmd "PRIVMSG $chan :Please provide a term."
  79. return
  80. }
  81. set argv [string trim $argv]
  82. # Upper case first character
  83. set argv [string toupper [string index $argv 0]][string range $argv 1 end]
  84. if {[catch {wiki::fetch $argv} data]} {
  85. $wiki::output_cmd "PRIVMSG $chan :Error: $data"
  86. return
  87. }
  88. foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {
  89. if {[incr count] > $wiki::max_lines} {
  90. $wiki::output_cmd "PRIVMSG $chan :Output truncated. [dict get $data url]"
  91. break
  92. }
  93. $wiki::output_cmd "PRIVMSG $chan :$line"
  94. }
  95. }
  96. # by fedex
  97. proc wiki::split_line {max str} {
  98. set last [expr {[string length $str] -1}]
  99. set start 0
  100. set end [expr {$max -1}]
  101. set lines []
  102. while {$start <= $last} {
  103. if {$last >= $end} {
  104. set end [string last { } $str $end]
  105. }
  106. lappend lines [string trim [string range $str $start $end]]
  107. set start $end
  108. set end [expr {$start + $max}]
  109. }
  110. return $lines
  111. }
  112. putlog "wiki.tcl loaded"