Adapt to py3

This commit is contained in:
Alarig Le Lay 2020-07-13 01:05:17 +02:00
parent 342248b05a
commit 39d5f5762c
Signed by: alarig
GPG key ID: 7AFE62C6DF8BCDEC

View file

@ -83,14 +83,14 @@
import weechat
w = weechat
import re
import htmllib
import html.parser
from time import time as now
from fnmatch import fnmatch
from urllib import quote
import urllib.request
SCRIPT_NAME = "announce_url_title"
SCRIPT_AUTHOR = "xt <xt@bash.no>"
SCRIPT_VERSION = "18"
SCRIPT_VERSION = "19"
SCRIPT_LICENSE = "GPL3"
SCRIPT_DESC = "Announce URL titles to channel or locally"
@ -121,13 +121,71 @@ buffer_name = ''
urls = {}
script_nick = 'url'
def error_callback(*_, **__):
pass
def is_string(data):
return isinstance(data, str)
def is_bytes(data):
return isinstance(data, bytes)
def to_ascii(data):
if is_string(data):
data = data.encode('ascii', errors='ignore')
elif is_bytes(data):
data = data.decode('ascii', errors='ignore')
else:
data = str(data).encode('ascii', errors='ignore')
return data
class Parser(html.parser.HTMLParser):
def __init__(self, url):
self.title = None
self.rec = False
html.parser.HTMLParser.__init__(self)
try:
self.feed(to_ascii(urllib.request.urlopen(url).read()))
except urllib.error.HTTPError:
return
except urllib.error.URLError:
return
except ValueError:
return
self.rec = False
self.error = error_callback
def handle_starttag(self, tag, attrs):
if tag == 'title':
self.rec = True
def handle_data(self, data):
if self.rec:
self.title = data
def handle_endtag(self, tag):
if tag == 'title':
self.rec = False
def get_title(url):
if Parser(url).title:
return Parser(url).title
else:
return "URL doesnt have a title"
def say(s, buffer=''):
"""normal msg"""
weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
def unescape(s):
"""Unescape HTML entities"""
p = htmllib.HTMLParser(None)
p = html.parser.HTMLParser(None)
p.save_bgn()
p.feed(s)
return p.save_end()
@ -170,7 +228,7 @@ def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message
ignorelist = w.config_get_plugin('url_ignore').split(',')
for url in urlRe.findall(message):
url = quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
url = urllib.parse.quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
ignore = False
for ignore_part in ignorelist:
if ignore_part.strip():
@ -199,59 +257,46 @@ def url_process_launcher():
if not url_d: # empty dict means not launched
url_d['launched'] = now()
# Read 8192
python2_bin = w.info_get("python2_bin", "") or "python"
cmd = python2_bin + " -c \"import urllib2; opener = urllib2.build_opener();"
cmd += "opener.addheaders = [('User-agent','%s')];" % user_agent
cmd += "print opener.open('%s').read(8192)\"" % url
title = get_title(url)
url_d['stdout'] = ''
url_d['url_hook_process'] = w.hook_process(cmd, 30 * 1000, "url_process_cb", "")
url_d['url_hook_process'] = w.hook_process(title, 30 * 1000, "title_process_cb", "")
return w.WEECHAT_RC_OK
def url_process_cb(data, command, rc, stdout, stderr):
def title_process_cb(data, title, rc, stdout, stderr):
""" Callback parsing html for title """
global buffer_name, urls
url = command.split("'")[-2]
if stdout != "":
urls[url]['stdout'] += stdout
if int(rc) >= 0:
max_len = int(w.config_get_plugin('title_max_length'))
if len(title) > max_len:
title = "%s [...]" % title[0:max_len]
head = re.sub("[\r\n\t ]"," ", urls[url]['stdout'])
title = re.search('(?i)\<title\>(.*?)\</title\>', head)
if title:
title = unescape(title.group(1))
max_len = int(w.config_get_plugin('title_max_length'))
if len(title) > max_len:
title = "%s [...]" % title[0:max_len]
splits = buffer_name.split('.') #FIXME bad code
server = splits[0]
buffer = '.'.join(splits[1:])
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
announce_public = w.config_get_plugin('announce_public')
if announce_public == 'on':
found = False
for active_buffer in w.config_get_plugin('buffers').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
found = True
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
found = True
if found == False:
say(output,w.buffer_search('', buffer_name))
else:
say(output,w.buffer_search('', buffer_name))
urls[url]['stdout'] = ''
splits = buffer_name.split('.') #FIXME bad code
server = splits[0]
buffer = '.'.join(splits[1:])
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
announce_public = w.config_get_plugin('announce_public')
if announce_public == 'on':
found = False
for active_buffer in w.config_get_plugin('buffers').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
found = True
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
if active_buffer.lower() == buffer_name.lower():
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
found = True
if found == False:
say(output,w.buffer_search('', buffer_name))
else:
say(output,w.buffer_search('', buffer_name))
return w.WEECHAT_RC_OK
def purge_cb(*args):
''' Purge the url list on configured intervals '''
@ -299,7 +344,7 @@ if __name__ == "__main__":
SCRIPT_DESC, "", ""):
# Set default settings
for option, default_value in settings.iteritems():
for option, default_value in settings.items():
if not w.config_is_set_plugin(option):
w.config_set_plugin(option, default_value)
ignore_buffers = Ignores('ignore_buffers')