Adapt to py3

This commit is contained in:
Alarig Le Lay 2020-07-13 01:05:17 +02:00
parent 342248b05a
commit 39d5f5762c
Signed by: alarig
GPG Key ID: 7AFE62C6DF8BCDEC
1 changed files with 91 additions and 46 deletions

View File

@ -83,14 +83,14 @@
import weechat import weechat
w = weechat w = weechat
import re import re
import htmllib import html.parser
from time import time as now from time import time as now
from fnmatch import fnmatch from fnmatch import fnmatch
from urllib import quote import urllib.request
SCRIPT_NAME = "announce_url_title" SCRIPT_NAME = "announce_url_title"
SCRIPT_AUTHOR = "xt <xt@bash.no>" SCRIPT_AUTHOR = "xt <xt@bash.no>"
SCRIPT_VERSION = "18" SCRIPT_VERSION = "19"
SCRIPT_LICENSE = "GPL3" SCRIPT_LICENSE = "GPL3"
SCRIPT_DESC = "Announce URL titles to channel or locally" SCRIPT_DESC = "Announce URL titles to channel or locally"
@ -121,13 +121,71 @@ buffer_name = ''
urls = {} urls = {}
script_nick = 'url' script_nick = 'url'
def error_callback(*_, **__):
pass
def is_string(data):
return isinstance(data, str)
def is_bytes(data):
return isinstance(data, bytes)
def to_ascii(data):
if is_string(data):
data = data.encode('ascii', errors='ignore')
elif is_bytes(data):
data = data.decode('ascii', errors='ignore')
else:
data = str(data).encode('ascii', errors='ignore')
return data
class Parser(html.parser.HTMLParser):
def __init__(self, url):
self.title = None
self.rec = False
html.parser.HTMLParser.__init__(self)
try:
self.feed(to_ascii(urllib.request.urlopen(url).read()))
except urllib.error.HTTPError:
return
except urllib.error.URLError:
return
except ValueError:
return
self.rec = False
self.error = error_callback
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.rec = True
def handle_data(self, data):
if self.rec:
self.title = data
def handle_endtag(self, tag):
if tag == 'title':
self.rec = False
def get_title(url):
if Parser(url).title:
return Parser(url).title
else:
return "URL doesnt have a title"
def say(s, buffer=''): def say(s, buffer=''):
"""normal msg""" """normal msg"""
weechat.prnt(buffer, '%s\t%s' %(script_nick, s)) weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
def unescape(s): def unescape(s):
"""Unescape HTML entities""" """Unescape HTML entities"""
p = htmllib.HTMLParser(None) p = html.parser.HTMLParser(None)
p.save_bgn() p.save_bgn()
p.feed(s) p.feed(s)
return p.save_end() return p.save_end()
@ -170,7 +228,7 @@ def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message
ignorelist = w.config_get_plugin('url_ignore').split(',') ignorelist = w.config_get_plugin('url_ignore').split(',')
for url in urlRe.findall(message): for url in urlRe.findall(message):
url = quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL url = urllib.parse.quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
ignore = False ignore = False
for ignore_part in ignorelist: for ignore_part in ignorelist:
if ignore_part.strip(): if ignore_part.strip():
@ -199,59 +257,46 @@ def url_process_launcher():
if not url_d: # empty dict means not launched if not url_d: # empty dict means not launched
url_d['launched'] = now() url_d['launched'] = now()
# Read 8192 title = get_title(url)
python2_bin = w.info_get("python2_bin", "") or "python"
cmd = python2_bin + " -c \"import urllib2; opener = urllib2.build_opener();"
cmd += "opener.addheaders = [('User-agent','%s')];" % user_agent
cmd += "print opener.open('%s').read(8192)\"" % url
url_d['stdout'] = '' url_d['stdout'] = ''
url_d['url_hook_process'] = w.hook_process(cmd, 30 * 1000, "url_process_cb", "") url_d['url_hook_process'] = w.hook_process(title, 30 * 1000, "title_process_cb", "")
return w.WEECHAT_RC_OK return w.WEECHAT_RC_OK
def url_process_cb(data, command, rc, stdout, stderr):
def title_process_cb(data, title, rc, stdout, stderr):
""" Callback parsing html for title """ """ Callback parsing html for title """
global buffer_name, urls global buffer_name, urls
url = command.split("'")[-2] max_len = int(w.config_get_plugin('title_max_length'))
if stdout != "": if len(title) > max_len:
urls[url]['stdout'] += stdout title = "%s [...]" % title[0:max_len]
if int(rc) >= 0:
head = re.sub("[\r\n\t ]"," ", urls[url]['stdout']) splits = buffer_name.split('.') #FIXME bad code
title = re.search('(?i)\<title\>(.*?)\</title\>', head) server = splits[0]
if title: buffer = '.'.join(splits[1:])
title = unescape(title.group(1)) output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
announce_public = w.config_get_plugin('announce_public')
max_len = int(w.config_get_plugin('title_max_length')) if announce_public == 'on':
if len(title) > max_len: found = False
title = "%s [...]" % title[0:max_len] for active_buffer in w.config_get_plugin('buffers').split(','):
if active_buffer.lower() == buffer_name.lower():
splits = buffer_name.split('.') #FIXME bad code w.command('', '/msg -server %s %s %s' %(server, buffer, output))
server = splits[0] found = True
buffer = '.'.join(splits[1:]) for active_buffer in w.config_get_plugin('buffers_notice').split(','):
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix') if active_buffer.lower() == buffer_name.lower():
announce_public = w.config_get_plugin('announce_public') w.command('', '/notice -server %s %s %s' %(server, buffer, output))
if announce_public == 'on': found = True
found = False if found == False:
for active_buffer in w.config_get_plugin('buffers').split(','): say(output,w.buffer_search('', buffer_name))
if active_buffer.lower() == buffer_name.lower(): else:
w.command('', '/msg -server %s %s %s' %(server, buffer, output)) say(output,w.buffer_search('', buffer_name))
found = True
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
found = True
if found == False:
say(output,w.buffer_search('', buffer_name))
else:
say(output,w.buffer_search('', buffer_name))
urls[url]['stdout'] = ''
return w.WEECHAT_RC_OK return w.WEECHAT_RC_OK
def purge_cb(*args): def purge_cb(*args):
''' Purge the url list on configured intervals ''' ''' Purge the url list on configured intervals '''
@ -299,7 +344,7 @@ if __name__ == "__main__":
SCRIPT_DESC, "", ""): SCRIPT_DESC, "", ""):
# Set default settings # Set default settings
for option, default_value in settings.iteritems(): for option, default_value in settings.items():
if not w.config_is_set_plugin(option): if not w.config_is_set_plugin(option):
w.config_set_plugin(option, default_value) w.config_set_plugin(option, default_value)
ignore_buffers = Ignores('ignore_buffers') ignore_buffers = Ignores('ignore_buffers')