|
|
@@ -0,0 +1,122 @@
|
|
|
+#
|
|
|
+# Mar 30 2010
|
|
|
+# by horgh
|
|
|
+#
|
|
|
+
|
|
|
+package require http
|
|
|
+package require htmlparse
|
|
|
+
|
|
|
+namespace eval wiki {
|
|
|
+ variable max_lines 1
|
|
|
+ variable output_cmd "putserv"
|
|
|
+ variable url "http://en.wikipedia.org/wiki/"
|
|
|
+
|
|
|
+ bind pub -|- "!w" wiki::search
|
|
|
+ bind pub -|- "!wiki" wiki::search
|
|
|
+
|
|
|
+# variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
|
|
|
+ variable parse_regexp {</table>.*?<p>(.*)</p>.*?<table id="toc"}
|
|
|
+
|
|
|
+ setudef flag wiki
|
|
|
+}
|
|
|
+
|
|
|
+proc wiki::fetch {term {url {}}} {
|
|
|
+ if {$url != ""} {
|
|
|
+ set token [http::geturl $url -timeout 10000]
|
|
|
+ } else {
|
|
|
+ set query [http::formatQuery $term]
|
|
|
+ set token [http::geturl ${wiki::url}${query} -timeout 10000]
|
|
|
+ }
|
|
|
+ set data [http::data $token]
|
|
|
+ set ncode [http::ncode $token]
|
|
|
+ set meta [http::meta $token]
|
|
|
+ upvar #0 $token state
|
|
|
+ set fetched_url $state(url)
|
|
|
+ http::cleanup $token
|
|
|
+
|
|
|
+ # debug
|
|
|
+ putlog "Fetch! term: $term url: $url fetched: $fetched_url"
|
|
|
+ set fid [open "w-debug.txt" w]
|
|
|
+ puts $fid $data
|
|
|
+ close $fid
|
|
|
+
|
|
|
+ # Follow redirects
|
|
|
+ if {[regexp -- {^3\d{2}$} $ncode]} {
|
|
|
+ return [wiki::fetch $term [dict get $meta Location]]
|
|
|
+ }
|
|
|
+
|
|
|
+ if {$ncode != 200} {
|
|
|
+ error "HTTP query failed ($ncode): $data: $meta"
|
|
|
+ }
|
|
|
+
|
|
|
+ # If page returns list of results, choose the first one and fetch that
|
|
|
+ if {[regexp -- {<p>.*?may refer to:</p>} $data]} {
|
|
|
+ regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
|
|
|
+ return [wiki::fetch $new_query]
|
|
|
+ }
|
|
|
+
|
|
|
+ if {![regexp -- $wiki::parse_regexp $data -> out]} {
|
|
|
+ error "Parse error"
|
|
|
+ }
|
|
|
+
|
|
|
+ return [list url $fetched_url result [wiki::sanitise $out]]
|
|
|
+}
|
|
|
+
|
|
|
+proc wiki::sanitise {raw} {
|
|
|
+ set raw [htmlparse::mapEscapes $raw]
|
|
|
+ # Remove pronunciation stuff
|
|
|
+ set raw [regsub -- {<span.*? class="IPA">.*?</span>} $raw ""]
|
|
|
+ # Remove some help links
|
|
|
+ set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]
|
|
|
+
|
|
|
+ set raw [regsub -all -- {<(.*?)>} $raw ""]
|
|
|
+ set raw [regsub -all -- {\[.*?\]} $raw ""]
|
|
|
+ set raw [regsub -all -- {\n} $raw " "]
|
|
|
+ return $raw
|
|
|
+}
|
|
|
+
|
|
|
+proc wiki::search {nick uhost hand chan argv} {
|
|
|
+ if {![channel get $chan wiki]} { return }
|
|
|
+ if {[string length $argv] == 0} {
|
|
|
+ $wiki::output_cmd "PRIVMSG $chan :Please provide a term."
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ set argv [string trim $argv]
|
|
|
+
|
|
|
+ if {[catch {wiki::fetch $argv} data]} {
|
|
|
+ $wiki::output_cmd "PRIVMSG $chan :Error: $data"
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ foreach line [wiki::split_line 400 [dict get $data result]] {
|
|
|
+ if {[incr count] > $wiki::max_lines} {
|
|
|
+ $wiki::output_cmd "PRIVMSG $chan :Output truncuated. [dict get $data url]"
|
|
|
+ break
|
|
|
+ }
|
|
|
+ $wiki::output_cmd "PRIVMSG $chan :$line"
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+# by fedex
|
|
|
+proc wiki::split_line {max str} {
|
|
|
+ set last [expr {[string length $str] -1}]
|
|
|
+ set start 0
|
|
|
+ set end [expr {$max -1}]
|
|
|
+
|
|
|
+ set lines []
|
|
|
+
|
|
|
+ while {$start <= $last} {
|
|
|
+ if {$last >= $end} {
|
|
|
+ set end [string last { } $str $end]
|
|
|
+ }
|
|
|
+
|
|
|
+ lappend lines [string trim [string range $str $start $end]]
|
|
|
+ set start $end
|
|
|
+ set end [expr {$start + $max}]
|
|
|
+ }
|
|
|
+
|
|
|
+ return $lines
|
|
|
+}
|
|
|
+
|
|
|
+putlog "wiki.tcl loaded"
|