slang.tcl 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. #
  2. # slang.tcl - June 24 2010
  3. # by horgh
  4. #
  5. # Requires Tcl 8.5+ and tcllib
  6. #
  7. # Made with heavy inspiration from perpleXa's urbandict script!
  8. #
  9. # Must .chanset #channel +ud
  10. #
  11. # Uses is.gd to shorten long definition URL if isgd.tcl package present
  12. #
  13. package require htmlparse
  14. package require http
  15. package require tls
  16. ::http::register https 443 [list ::tls::socket -ssl2 0 -ssl3 0 -tls1 1]
  17. namespace eval ::ud {
  18. # set this to !ud or whatever you want
  19. variable trigger "slang"
  20. # maximum lines to output
  21. variable max_lines 1
  22. # approximate characters per line
  23. variable line_length 400
  24. # show truncated message / url if more than one line
  25. variable show_truncate 1
  26. # toggle whether we store raw response data.
  27. # this will store the response from an http request to urbandictionary.com
  28. # in files for debugging.
  29. # NOTE: enabling this will cause a file to be created for every request
  30. # the script makes, so these can pile up quickly!
  31. variable store_responses 0
  32. # the directory to store responses if store_responses is on.
  33. # this is under your eggdrop directory.
  34. # files under this directory will be named with unix timestamps
  35. # (microseconds).
  36. variable store_responses_dir slang_responses
  37. variable output_cmd "putserv"
  38. variable client "Mozilla/5.0 (compatible; Y!J; for robot study; keyoshid)"
  39. variable url https://www.urbandictionary.com/define.php
  40. variable url_random https://www.urbandictionary.com/random.php
  41. # regex to find the word
  42. variable word_regex {<a class="word" href=.*?>(.*?)</a>}
  43. variable list_regex {<div class="def-panel *" data-defid="[0-9]+?">.*?<div class="def-footer">}
  44. variable def_regex {<div class="def-panel *" data-defid="([0-9]+?)">.*?<div class="meaning">(.*?)</div>.*?<div class="example">(.*?)</div>}
  45. setudef flag ud
  46. bind pub -|- $::ud::trigger ::ud::handler
  47. # 0 if isgd package is present
  48. variable isgd_disabled [catch {package require isgd}]
  49. }
  50. # write a console log message.
  51. proc ::ud::log {msg} {
  52. if {[string length $msg] == 0} {
  53. return
  54. }
  55. putlog "slang.tcl $msg"
  56. }
  57. proc ::ud::handler {nick uhost hand chan argv} {
  58. if {![channel get $chan ud]} { return }
  59. set argv [string trim $argv]
  60. set argv [split $argv]
  61. if {[string is digit [lindex $argv 0]]} {
  62. set number [lindex $argv 0]
  63. set query [join [lrange $argv 1 end]]
  64. } else {
  65. set query [join $argv]
  66. set number 1
  67. }
  68. set query [string trim $query]
  69. if {[llength $argv] == 1 && [string is digit [lindex $argv 0]]} {
  70. $::ud::output_cmd "PRIVMSG $chan :Usage: $::ud::trigger \[#\] <query> (or just $::ud::trigger for random definition)"
  71. return
  72. }
  73. if {$query == ""} {
  74. ::ud::log "Performing random query..."
  75. if {[catch {::ud::get_random} result]} {
  76. $::ud::output_cmd "PRIVMSG $chan :Error: $result"
  77. return
  78. }
  79. ::ud::output $chan $result
  80. } else {
  81. ::ud::log "Fetching definition $number of $query..."
  82. if {[catch {::ud::get_def $query $number} result]} {
  83. $::ud::output_cmd "PRIVMSG $chan :Error: $result"
  84. return
  85. }
  86. ::ud::output $chan $result
  87. }
  88. }
  89. proc ::ud::output {chan def_dict} {
  90. set output 0
  91. foreach line [::ud::split_line $::ud::line_length [dict get $def_dict definition]] {
  92. if {[incr output] > $::ud::max_lines} {
  93. if {$::ud::show_truncate} {
  94. $::ud::output_cmd "PRIVMSG $chan :Output truncated. [::ud::def_url $def_dict]"
  95. }
  96. break
  97. }
  98. $::ud::output_cmd "PRIVMSG $chan :$line"
  99. }
  100. }
  101. proc ::ud::get_random {} {
  102. set result [::ud::http_fetch $::ud::url_random -1]
  103. set word [dict get $result word]
  104. set defs_html [dict get $result definitions]
  105. if {[llength $defs_html] < 1} {
  106. error "Failure finding random definition."
  107. }
  108. return [::ud::parse $word [lindex $defs_html 0]]
  109. }
  110. proc ::ud::get_def {query number} {
  111. set page [expr {int(ceil($number / 7.0))}]
  112. set number [expr {$number - (($page - 1) * 7)}]
  113. set url $::ud::url
  114. append url ?
  115. if {$page == 1} {
  116. append url [::http::formatQuery term $query]
  117. } else {
  118. append url [::http::formatQuery term $query page $page]
  119. }
  120. set result [::ud::http_fetch $url $page]
  121. set word [dict get $result word]
  122. set defs_html [dict get $result definitions]
  123. if {[llength $defs_html] < $number} {
  124. error "[llength $defs_html] definitions found."
  125. }
  126. return [::ud::parse $word [lindex $defs_html [expr {$number - 1}]]]
  127. }
  128. # store an http request response (if enabled).
  129. proc ::ud::store_response {data} {
  130. if {!$::ud::store_responses} {
  131. return
  132. }
  133. # ensure the directory to store the responses exists.
  134. if {![file isdirectory $::ud::store_responses_dir]} {
  135. # mkdir raises an error if it fails.
  136. file mkdir $::ud::store_responses_dir
  137. }
  138. # make the filename that we will store to.
  139. set base [clock microseconds]
  140. set path [file join $::ud::store_responses_dir $base]
  141. # write out the response
  142. set f [open $path w]
  143. puts -nonewline $f $data
  144. close $f
  145. ::ud::log "stored response to $path"
  146. }
  147. proc ::ud::http_fetch {url page} {
  148. http::config -useragent $::ud::client
  149. ::ud::log "Fetching $url"
  150. set token [http::geturl $url -timeout 20000]
  151. set data [http::data $token]
  152. set ncode [http::ncode $token]
  153. set meta [http::meta $token]
  154. http::cleanup $token
  155. # Follow redirects
  156. if {[regexp -- {30[01237]} $ncode]} {
  157. set new_url [::ud::get_location_header $meta]
  158. # We lose our page parameter apparently.
  159. if {$page != -1 && $page != 1} {
  160. append new_url &page=$page
  161. }
  162. return [::ud::http_fetch $new_url $page]
  163. }
  164. if {$ncode == 404} {
  165. error "No definitions found."
  166. }
  167. if {$ncode != 200} {
  168. error "HTTP fetch error. Code: $ncode"
  169. }
  170. # we may be storing responses for debugging.
  171. if {[catch {::ud::store_response $data} result]} {
  172. putlog "Problem storing response: $result"
  173. }
  174. return [::ud::parse_word_and_definitions $data]
  175. }
  176. proc ::ud::get_location_header {meta} {
  177. dict for {k v} $meta {
  178. set k [string tolower $k]
  179. if {$k == "location"} {
  180. return $v
  181. }
  182. }
  183. error "Location header not found"
  184. }
  185. # parse a response from a file.
  186. # this is primarily for debugging purposes. we can pass this function
  187. # a stored response file to try to parse it.
  188. proc ::ud::parse_response_file {path} {
  189. set f [open $path]
  190. set data [read -nonewline $f]
  191. close $f
  192. return [::ud::parse_word_and_definitions $data]
  193. }
  194. # first pass parsing - we pull out the word and the definitions from
  195. # the page.
  196. # we return a dictionary with keys 'word' and 'definitions' on success.
  197. # on failure, we raise an error.
  198. proc ::ud::parse_word_and_definitions {data} {
  199. # pull out the word.
  200. if {![regexp -- $::ud::word_regex $data -> word]} {
  201. error "Word not found. No definitions or parsing problem!"
  202. }
  203. set word [string trim $word]
  204. set definitions [regexp -all -inline -- $::ud::list_regex $data]
  205. set definition_count [llength $definitions]
  206. if {$definition_count == 0} {
  207. error "No definitions found"
  208. }
  209. return [list word $word definitions $definitions]
  210. }
  211. proc ::ud::parse {word raw_definition} {
  212. if {![regexp $::ud::def_regex $raw_definition -> number definition]} {
  213. error "Could not parse definition's HTML"
  214. }
  215. set definition [htmlparse::mapEscapes $definition]
  216. set definition [regsub -all -- {<.*?>} $definition ""]
  217. set definition [regsub -all -- {\n+} $definition " "]
  218. set definition [string tolower $definition]
  219. set definition [string trim $definition]
  220. return [list number $number word $word definition "$word is $definition"]
  221. }
  222. proc ::ud::def_url {def_dict} {
  223. set word [dict get $def_dict word]
  224. set number [dict get $def_dict number]
  225. set raw_url ${::ud::url}?[http::formatQuery term $word defid $number]
  226. if {$::ud::isgd_disabled} {
  227. return $raw_url
  228. } else {
  229. if {[catch {isgd::shorten $raw_url} shortened]} {
  230. return "$raw_url (is.gd error)"
  231. } else {
  232. return $shortened
  233. }
  234. }
  235. }
  236. # by fedex
  237. proc ::ud::split_line {max str} {
  238. set last [expr {[string length $str] -1}]
  239. set start 0
  240. set end [expr {$max -1}]
  241. set lines []
  242. while {$start <= $last} {
  243. if {$last >= $end} {
  244. set end [string last { } $str $end]
  245. }
  246. lappend lines [string trim [string range $str $start $end]]
  247. set start $end
  248. set end [expr {$start + $max}]
  249. }
  250. return $lines
  251. }
  252. putlog "slang.tcl loaded"