Adapt to py3
This commit is contained in:
parent
342248b05a
commit
39d5f5762c
1 changed files with 91 additions and 46 deletions
|
@ -83,14 +83,14 @@
|
||||||
import weechat
|
import weechat
|
||||||
w = weechat
|
w = weechat
|
||||||
import re
|
import re
|
||||||
import htmllib
|
import html.parser
|
||||||
from time import time as now
|
from time import time as now
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
from urllib import quote
|
import urllib.request
|
||||||
|
|
||||||
SCRIPT_NAME = "announce_url_title"
|
SCRIPT_NAME = "announce_url_title"
|
||||||
SCRIPT_AUTHOR = "xt <xt@bash.no>"
|
SCRIPT_AUTHOR = "xt <xt@bash.no>"
|
||||||
SCRIPT_VERSION = "18"
|
SCRIPT_VERSION = "19"
|
||||||
SCRIPT_LICENSE = "GPL3"
|
SCRIPT_LICENSE = "GPL3"
|
||||||
SCRIPT_DESC = "Announce URL titles to channel or locally"
|
SCRIPT_DESC = "Announce URL titles to channel or locally"
|
||||||
|
|
||||||
|
@ -121,13 +121,71 @@ buffer_name = ''
|
||||||
|
|
||||||
urls = {}
|
urls = {}
|
||||||
script_nick = 'url'
|
script_nick = 'url'
|
||||||
|
|
||||||
|
|
||||||
|
def error_callback(*_, **__):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def is_string(data):
|
||||||
|
return isinstance(data, str)
|
||||||
|
|
||||||
|
def is_bytes(data):
|
||||||
|
return isinstance(data, bytes)
|
||||||
|
|
||||||
|
def to_ascii(data):
|
||||||
|
if is_string(data):
|
||||||
|
data = data.encode('ascii', errors='ignore')
|
||||||
|
elif is_bytes(data):
|
||||||
|
data = data.decode('ascii', errors='ignore')
|
||||||
|
else:
|
||||||
|
data = str(data).encode('ascii', errors='ignore')
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(html.parser.HTMLParser):
|
||||||
|
def __init__(self, url):
|
||||||
|
self.title = None
|
||||||
|
self.rec = False
|
||||||
|
html.parser.HTMLParser.__init__(self)
|
||||||
|
try:
|
||||||
|
self.feed(to_ascii(urllib.request.urlopen(url).read()))
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
return
|
||||||
|
except urllib.error.URLError:
|
||||||
|
return
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.rec = False
|
||||||
|
self.error = error_callback
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag == 'title':
|
||||||
|
self.rec = True
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if self.rec:
|
||||||
|
self.title = data
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag == 'title':
|
||||||
|
self.rec = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_title(url):
|
||||||
|
if Parser(url).title:
|
||||||
|
return Parser(url).title
|
||||||
|
else:
|
||||||
|
return "URL doesn’t have a title"
|
||||||
|
|
||||||
|
|
||||||
def say(s, buffer=''):
|
def say(s, buffer=''):
|
||||||
"""normal msg"""
|
"""normal msg"""
|
||||||
weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
|
weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
|
||||||
|
|
||||||
def unescape(s):
|
def unescape(s):
|
||||||
"""Unescape HTML entities"""
|
"""Unescape HTML entities"""
|
||||||
p = htmllib.HTMLParser(None)
|
p = html.parser.HTMLParser(None)
|
||||||
p.save_bgn()
|
p.save_bgn()
|
||||||
p.feed(s)
|
p.feed(s)
|
||||||
return p.save_end()
|
return p.save_end()
|
||||||
|
@ -170,7 +228,7 @@ def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message
|
||||||
ignorelist = w.config_get_plugin('url_ignore').split(',')
|
ignorelist = w.config_get_plugin('url_ignore').split(',')
|
||||||
for url in urlRe.findall(message):
|
for url in urlRe.findall(message):
|
||||||
|
|
||||||
url = quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
|
url = urllib.parse.quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
|
||||||
ignore = False
|
ignore = False
|
||||||
for ignore_part in ignorelist:
|
for ignore_part in ignorelist:
|
||||||
if ignore_part.strip():
|
if ignore_part.strip():
|
||||||
|
@ -199,59 +257,46 @@ def url_process_launcher():
|
||||||
if not url_d: # empty dict means not launched
|
if not url_d: # empty dict means not launched
|
||||||
url_d['launched'] = now()
|
url_d['launched'] = now()
|
||||||
|
|
||||||
# Read 8192
|
title = get_title(url)
|
||||||
python2_bin = w.info_get("python2_bin", "") or "python"
|
|
||||||
cmd = python2_bin + " -c \"import urllib2; opener = urllib2.build_opener();"
|
|
||||||
cmd += "opener.addheaders = [('User-agent','%s')];" % user_agent
|
|
||||||
cmd += "print opener.open('%s').read(8192)\"" % url
|
|
||||||
|
|
||||||
url_d['stdout'] = ''
|
url_d['stdout'] = ''
|
||||||
url_d['url_hook_process'] = w.hook_process(cmd, 30 * 1000, "url_process_cb", "")
|
url_d['url_hook_process'] = w.hook_process(title, 30 * 1000, "title_process_cb", "")
|
||||||
|
|
||||||
return w.WEECHAT_RC_OK
|
return w.WEECHAT_RC_OK
|
||||||
|
|
||||||
def url_process_cb(data, command, rc, stdout, stderr):
|
|
||||||
|
def title_process_cb(data, title, rc, stdout, stderr):
|
||||||
""" Callback parsing html for title """
|
""" Callback parsing html for title """
|
||||||
|
|
||||||
global buffer_name, urls
|
global buffer_name, urls
|
||||||
|
|
||||||
url = command.split("'")[-2]
|
max_len = int(w.config_get_plugin('title_max_length'))
|
||||||
if stdout != "":
|
if len(title) > max_len:
|
||||||
urls[url]['stdout'] += stdout
|
title = "%s [...]" % title[0:max_len]
|
||||||
if int(rc) >= 0:
|
|
||||||
|
|
||||||
head = re.sub("[\r\n\t ]"," ", urls[url]['stdout'])
|
splits = buffer_name.split('.') #FIXME bad code
|
||||||
title = re.search('(?i)\<title\>(.*?)\</title\>', head)
|
server = splits[0]
|
||||||
if title:
|
buffer = '.'.join(splits[1:])
|
||||||
title = unescape(title.group(1))
|
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
|
||||||
|
announce_public = w.config_get_plugin('announce_public')
|
||||||
max_len = int(w.config_get_plugin('title_max_length'))
|
if announce_public == 'on':
|
||||||
if len(title) > max_len:
|
found = False
|
||||||
title = "%s [...]" % title[0:max_len]
|
for active_buffer in w.config_get_plugin('buffers').split(','):
|
||||||
|
if active_buffer.lower() == buffer_name.lower():
|
||||||
splits = buffer_name.split('.') #FIXME bad code
|
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
|
||||||
server = splits[0]
|
found = True
|
||||||
buffer = '.'.join(splits[1:])
|
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
|
||||||
output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
|
if active_buffer.lower() == buffer_name.lower():
|
||||||
announce_public = w.config_get_plugin('announce_public')
|
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
|
||||||
if announce_public == 'on':
|
found = True
|
||||||
found = False
|
if found == False:
|
||||||
for active_buffer in w.config_get_plugin('buffers').split(','):
|
say(output,w.buffer_search('', buffer_name))
|
||||||
if active_buffer.lower() == buffer_name.lower():
|
else:
|
||||||
w.command('', '/msg -server %s %s %s' %(server, buffer, output))
|
say(output,w.buffer_search('', buffer_name))
|
||||||
found = True
|
|
||||||
for active_buffer in w.config_get_plugin('buffers_notice').split(','):
|
|
||||||
if active_buffer.lower() == buffer_name.lower():
|
|
||||||
w.command('', '/notice -server %s %s %s' %(server, buffer, output))
|
|
||||||
found = True
|
|
||||||
if found == False:
|
|
||||||
say(output,w.buffer_search('', buffer_name))
|
|
||||||
else:
|
|
||||||
say(output,w.buffer_search('', buffer_name))
|
|
||||||
urls[url]['stdout'] = ''
|
|
||||||
|
|
||||||
return w.WEECHAT_RC_OK
|
return w.WEECHAT_RC_OK
|
||||||
|
|
||||||
|
|
||||||
def purge_cb(*args):
|
def purge_cb(*args):
|
||||||
''' Purge the url list on configured intervals '''
|
''' Purge the url list on configured intervals '''
|
||||||
|
|
||||||
|
@ -299,7 +344,7 @@ if __name__ == "__main__":
|
||||||
SCRIPT_DESC, "", ""):
|
SCRIPT_DESC, "", ""):
|
||||||
|
|
||||||
# Set default settings
|
# Set default settings
|
||||||
for option, default_value in settings.iteritems():
|
for option, default_value in settings.items():
|
||||||
if not w.config_is_set_plugin(option):
|
if not w.config_is_set_plugin(option):
|
||||||
w.config_set_plugin(option, default_value)
|
w.config_set_plugin(option, default_value)
|
||||||
ignore_buffers = Ignores('ignore_buffers')
|
ignore_buffers = Ignores('ignore_buffers')
|
||||||
|
|
Loading…
Reference in a new issue