Adapt to py3
This commit is contained in:
parent
342248b05a
commit
39d5f5762c
1 changed files with 91 additions and 46 deletions
|
@ -83,14 +83,14 @@
|
|||
import weechat
|
||||
w = weechat
|
||||
import re
|
||||
import htmllib
|
||||
import html.parser
|
||||
from time import time as now
|
||||
from fnmatch import fnmatch
|
||||
from urllib import quote
|
||||
import urllib.request
|
||||
|
||||
SCRIPT_NAME = "announce_url_title"
|
||||
SCRIPT_AUTHOR = "xt <xt@bash.no>"
|
||||
SCRIPT_VERSION = "18"
|
||||
SCRIPT_VERSION = "19"
|
||||
SCRIPT_LICENSE = "GPL3"
|
||||
SCRIPT_DESC = "Announce URL titles to channel or locally"
|
||||
|
||||
|
@ -121,13 +121,71 @@ buffer_name = ''
|
|||
|
||||
urls = {}
|
||||
script_nick = 'url'
|
||||
|
||||
|
||||
def error_callback(*_, **__):
|
||||
pass
|
||||
|
||||
def is_string(data):
|
||||
return isinstance(data, str)
|
||||
|
||||
def is_bytes(data):
|
||||
return isinstance(data, bytes)
|
||||
|
||||
def to_ascii(data):
|
||||
if is_string(data):
|
||||
data = data.encode('ascii', errors='ignore')
|
||||
elif is_bytes(data):
|
||||
data = data.decode('ascii', errors='ignore')
|
||||
else:
|
||||
data = str(data).encode('ascii', errors='ignore')
|
||||
return data
|
||||
|
||||
|
||||
class Parser(html.parser.HTMLParser):
|
||||
def __init__(self, url):
|
||||
self.title = None
|
||||
self.rec = False
|
||||
html.parser.HTMLParser.__init__(self)
|
||||
try:
|
||||
self.feed(to_ascii(urllib.request.urlopen(url).read()))
|
||||
except urllib.error.HTTPError:
|
||||
return
|
||||
except urllib.error.URLError:
|
||||
return
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
self.rec = False
|
||||
self.error = error_callback
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'title':
|
||||
self.rec = True
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.rec:
|
||||
self.title = data
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'title':
|
||||
self.rec = False
|
||||
|
||||
|
||||
def get_title(url):
|
||||
if Parser(url).title:
|
||||
return Parser(url).title
|
||||
else:
|
||||
return "URL doesn’t have a title"
|
||||
|
||||
|
||||
def say(s, buffer=''):
|
||||
"""normal msg"""
|
||||
weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
|
||||
|
||||
def unescape(s):
|
||||
"""Unescape HTML entities"""
|
||||
p = htmllib.HTMLParser(None)
|
||||
p = html.parser.HTMLParser(None)
|
||||
p.save_bgn()
|
||||
p.feed(s)
|
||||
return p.save_end()
|
||||
|
@ -170,7 +228,7 @@ def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message
|
|||
ignorelist = w.config_get_plugin('url_ignore').split(',')
|
||||
for url in urlRe.findall(message):
|
||||
|
||||
url = quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
|
||||
url = urllib.parse.quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
|
||||
ignore = False
|
||||
for ignore_part in ignorelist:
|
||||
if ignore_part.strip():
|
||||
|
@ -199,59 +257,46 @@ def url_process_launcher():
|
|||
if not url_d: # empty dict means not launched
|
||||
url_d['launched'] = now()
|
||||
|
||||
# Read 8192
|
||||
python2_bin = w.info_get("python2_bin", "") or "python"
|
||||
cmd = python2_bin + " -c \"import urllib2; opener = urllib2.build_opener();"
|
||||
cmd += "opener.addheaders = [('User-agent','%s')];" % user_agent
|
||||
cmd += "print opener.open('%s').read(8192)\"" % url
|
||||
title = get_title(url)
|
||||
|
||||
url_d['stdout'] = ''
|
||||
url_d['url_hook_process'] = w.hook_process(cmd, 30 * 1000, "url_process_cb", "")
|
||||
url_d['url_hook_process'] = w.hook_process(title, 30 * 1000, "title_process_cb", "")
|
||||
|
||||
return w.WEECHAT_RC_OK
|
||||
|
||||
def url_process_cb(data, command, rc, stdout, stderr):
|
||||
|
||||
def title_process_cb(data, title, rc, stdout, stderr):
|
||||
""" Callback parsing html for title """
|
||||
|
||||
global buffer_name, urls
|
||||
|
||||
url = command.split("'")[-2]
|
||||
if stdout != "":
|
||||
urls[url]['stdout'] += stdout
|
||||
if int(rc) >= 0:
|
||||
max_len = int(w.config_get_plugin('title_max_length'))
|
||||
if len(title) > max_len:
|
||||
title = "%s [...]" % title[0:max_len]
|
||||
|
||||
head = re.sub("[\r\n\t ]"," ", urls[url]['stdout'])
|
||||
title = re.search('(?i)\<title\>(.*?)\</title\>', head)
|
||||
if title:
|
||||
title = unescape(title.group(1))
|
||||
|
||||
max_len = int(w.config_get_plugin('title_max_length'))
|
||||
if len(title) > max_len:
|
||||
title = "%s [...]" % title[0:max_len]
|
||||
|
||||
splits = buffer_name.split('.') #FIXME bad code
|
||||
server = splits[0]
|
||||
buffer = '.'.join(splits[1:])
|
||||
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
|
||||
announce_public = w.config_get_plugin('announce_public')
|
||||
if announce_public == 'on':
|
||||
found = False
|
||||
for active_buffer in w.config_get_plugin('buffers').split(','):
|
||||
if active_buffer.lower() == buffer_name.lower():
|
||||
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
|
||||
found = True
|
||||
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
|
||||
if active_buffer.lower() == buffer_name.lower():
|
||||
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
|
||||
found = True
|
||||
if found == False:
|
||||
say(output,w.buffer_search('', buffer_name))
|
||||
else:
|
||||
say(output,w.buffer_search('', buffer_name))
|
||||
urls[url]['stdout'] = ''
|
||||
splits = buffer_name.split('.') #FIXME bad code
|
||||
server = splits[0]
|
||||
buffer = '.'.join(splits[1:])
|
||||
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
|
||||
announce_public = w.config_get_plugin('announce_public')
|
||||
if announce_public == 'on':
|
||||
found = False
|
||||
for active_buffer in w.config_get_plugin('buffers').split(','):
|
||||
if active_buffer.lower() == buffer_name.lower():
|
||||
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
|
||||
found = True
|
||||
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
|
||||
if active_buffer.lower() == buffer_name.lower():
|
||||
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
|
||||
found = True
|
||||
if found == False:
|
||||
say(output,w.buffer_search('', buffer_name))
|
||||
else:
|
||||
say(output,w.buffer_search('', buffer_name))
|
||||
|
||||
return w.WEECHAT_RC_OK
|
||||
|
||||
|
||||
def purge_cb(*args):
|
||||
''' Purge the url list on configured intervals '''
|
||||
|
||||
|
@ -299,7 +344,7 @@ if __name__ == "__main__":
|
|||
SCRIPT_DESC, "", ""):
|
||||
|
||||
# Set default settings
|
||||
for option, default_value in settings.iteritems():
|
||||
for option, default_value in settings.items():
|
||||
if not w.config_is_set_plugin(option):
|
||||
w.config_set_plugin(option, default_value)
|
||||
ignore_buffers = Ignores('ignore_buffers')
|
||||
|
|
Loading…
Reference in a new issue