Adapt to py3

2020-07-13 01:05:17 +02:00 · 2020-07-13 01:05:17 +02:00 · 39d5f5762c
parent 342248b05a
commit 39d5f5762c
1 changed files with 91 additions and 46 deletions
--- a/announce_url_title.py
+++ b/announce_url_title.py
@ -83,14 +83,14 @@
 import weechat
 w = weechat
 import re
-import htmllib
+import html.parser
 from time import time as now
 from fnmatch import fnmatch
-from urllib import quote
+import urllib.request
 SCRIPT_NAME    = "announce_url_title"
 SCRIPT_AUTHOR  = "xt <xt@bash.no>"
-SCRIPT_VERSION = "18"
+SCRIPT_VERSION = "19"
 SCRIPT_LICENSE = "GPL3"
 SCRIPT_DESC    = "Announce URL titles to channel or locally"
@ -121,13 +121,71 @@ buffer_name = ''
 urls = {}
 script_nick = 'url'
 def error_callback(*_, **__):
    pass
 def is_string(data):
    return isinstance(data, str)
 def is_bytes(data):
    return isinstance(data, bytes)
 def to_ascii(data):
    if is_string(data):
        data = data.encode('ascii', errors='ignore')
    elif is_bytes(data):
        data = data.decode('ascii', errors='ignore')
    else:
        data = str(data).encode('ascii', errors='ignore')
    return data
 class Parser(html.parser.HTMLParser):
    def __init__(self, url):
        self.title = None
        self.rec = False
        html.parser.HTMLParser.__init__(self)
        try:
            self.feed(to_ascii(urllib.request.urlopen(url).read()))
        except urllib.error.HTTPError:
            return
        except urllib.error.URLError:
            return
        except ValueError:
            return
        self.rec = False
        self.error = error_callback
    def handle_starttag(self, tag, attrs):
        if tag == 'title':
            self.rec = True
    def handle_data(self, data):
        if self.rec:
            self.title = data
    def handle_endtag(self, tag):
        if tag == 'title':
            self.rec = False
 def get_title(url):
    if Parser(url).title:
        return Parser(url).title
    else:
        return "URL doesn’t have a title"
 def say(s, buffer=''):
    """normal msg"""
    weechat.prnt(buffer, '%s\t%s' %(script_nick, s))
 def unescape(s):
    """Unescape HTML entities"""
-    p = htmllib.HTMLParser(None)
+    p = html.parser.HTMLParser(None)
    p.save_bgn()
    p.feed(s)
    return p.save_end()
@ -170,7 +228,7 @@ def url_print_cb(data, buffer, time, tags, displayed, highlight, prefix, message
    ignorelist = w.config_get_plugin('url_ignore').split(',')
    for url in urlRe.findall(message):
-        url = quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
+        url = urllib.parse.quote(url, "%/:=&?~#+!$,;@()*[]") # Escape URL
        ignore = False
        for ignore_part in ignorelist:
            if ignore_part.strip():
@ -199,59 +257,46 @@ def url_process_launcher():
        if not url_d: # empty dict means not launched
            url_d['launched'] = now()
-            # Read 8192
+            title = get_title(url)
            python2_bin = w.info_get("python2_bin", "") or "python"
            cmd = python2_bin + " -c \"import urllib2; opener = urllib2.build_opener();"
            cmd += "opener.addheaders = [('User-agent','%s')];" % user_agent
            cmd += "print opener.open('%s').read(8192)\"" % url
            url_d['stdout'] = ''
-            url_d['url_hook_process'] = w.hook_process(cmd, 30 * 1000, "url_process_cb", "")
+            url_d['url_hook_process'] = w.hook_process(title, 30 * 1000, "title_process_cb", "")
    return w.WEECHAT_RC_OK
-def url_process_cb(data, command, rc, stdout, stderr):
+
 def title_process_cb(data, title, rc, stdout, stderr):
    """ Callback parsing html for title """
    global buffer_name, urls
-    url = command.split("'")[-2]
+    max_len = int(w.config_get_plugin('title_max_length'))
-    if stdout != "":
+    if len(title) > max_len:
-        urls[url]['stdout'] += stdout
+        title = "%s [...]" % title[0:max_len]
    if int(rc) >= 0:
-        head = re.sub("[\r\n\t ]"," ", urls[url]['stdout'])
+    splits = buffer_name.split('.') #FIXME bad code
-        title = re.search('(?i)\<title\>(.*?)\</title\>', head)
+    server = splits[0]
-        if title:
+    buffer = '.'.join(splits[1:])
-            title = unescape(title.group(1))
+    output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
-
+    announce_public = w.config_get_plugin('announce_public')
-            max_len = int(w.config_get_plugin('title_max_length'))
+    if announce_public == 'on':
-            if len(title) > max_len:
+        found = False
-                title = "%s [...]" % title[0:max_len]
+        for active_buffer in w.config_get_plugin('buffers').split(','):
-
+            if active_buffer.lower() == buffer_name.lower():
-            splits = buffer_name.split('.') #FIXME bad code
+                w.command('', '/msg -server %s %s %s' %(server, buffer, output))
-            server = splits[0]
+                found = True
-            buffer = '.'.join(splits[1:])
+        for active_buffer in w.config_get_plugin('buffers_notice').split(','):
-            output = w.config_get_plugin('prefix') + title + w.config_get_plugin('suffix')
+            if active_buffer.lower() == buffer_name.lower():
-            announce_public = w.config_get_plugin('announce_public')
+                w.command('', '/notice -server %s %s %s' %(server, buffer, output))
-            if announce_public == 'on':
+                found = True
-                found = False
+        if found == False:
-                for active_buffer in w.config_get_plugin('buffers').split(','):
+            say(output,w.buffer_search('', buffer_name))
-                    if active_buffer.lower() == buffer_name.lower():
+    else:
-                        w.command('', '/msg -server %s %s %s' %(server, buffer, output))
+        say(output,w.buffer_search('', buffer_name))
                        found = True
                for active_buffer in w.config_get_plugin('buffers_notice').split(','):
                    if active_buffer.lower() == buffer_name.lower():
                        w.command('', '/notice -server %s %s %s' %(server, buffer, output))
                        found = True
                if found == False:
                    say(output,w.buffer_search('', buffer_name))
            else:
                say(output,w.buffer_search('', buffer_name))
        urls[url]['stdout'] = ''
    return w.WEECHAT_RC_OK
 def purge_cb(*args):
    ''' Purge the url list on configured intervals '''
@ -299,7 +344,7 @@ if __name__ == "__main__":
                        SCRIPT_DESC, "", ""):
        # Set default settings
-        for option, default_value in settings.iteritems():
+        for option, default_value in settings.items():
            if not w.config_is_set_plugin(option):
                w.config_set_plugin(option, default_value)
        ignore_buffers = Ignores('ignore_buffers')