[releng/abderrahim/downloadsites: 1/3] refactor and partly rewrite convert-to-tarballs



commit 5862b34ba9cac44c99c3971bd8fd1fadb03c4730
Author: Abderrahim Kitouni <akitouni gnome org>
Date:   Mon Apr 6 17:43:30 2020 +0100

    refactor and partly rewrite convert-to-tarballs
    
    This should allow us to add support for diferent sites (e.g. github)

 tools/smoketesting/convert-to-tarballs.py    | 510 +++++++--------------------
 tools/smoketesting/downloadsites.py          | 155 ++++++++
 tools/smoketesting/tarball-conversion.config |   3 -
 3 files changed, 281 insertions(+), 387 deletions(-)
---
diff --git a/tools/smoketesting/convert-to-tarballs.py b/tools/smoketesting/convert-to-tarballs.py
index 0cb213c..ec3c4c6 100755
--- a/tools/smoketesting/convert-to-tarballs.py
+++ b/tools/smoketesting/convert-to-tarballs.py
@@ -4,6 +4,7 @@
 # Copyright (c) 2007-2009, Olav Vitters
 # Copyright (c) 2006-2009, Vincent Untz
 # Copyright (c) 2017, Codethink Limited
+# Copyright 2020, Abderrahim Kitouni
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -23,90 +24,19 @@
 
 
 import sys
-import re
 import optparse
 import os
-from posixpath import join as posixjoin # Handy for URLs
 from xml.dom import minidom, Node
-from html.parser import HTMLParser
-import requests
-import urllib.parse
-import socket
 from ruamel import yaml
 from collections import defaultdict
+from tqdm import tqdm
 
-have_sftp = False
-try:
-    import paramiko
-
-    have_sftp = True
-except: pass
-
-# utility functions
-def get_links(html):
-    class urllister(HTMLParser):
-        def reset(self):
-            HTMLParser.reset(self)
-            self.urls = []
-
-        def handle_starttag(self, tag, attrs):
-            if tag == 'a':
-                href = [v for k, v in attrs if k=='href']
-                if href:
-                    self.urls.extend(href)
-
-    parser = urllister()
-    parser.feed(html)
-    parser.close()
-    return parser.urls
-
-def get_latest_version(versions, max_version):
-    def bigger_version(a, b):
-        a_nums = a.split('.')
-        b_nums = b.split('.')
-        num_fields = min(len(a_nums), len(b_nums))
-        for i in range(0,num_fields):
-            if   int(a_nums[i]) > int(b_nums[i]):
-                return a
-            elif int(a_nums[i]) < int(b_nums[i]):
-                return b
-        if len(a_nums) > len(b_nums):
-            return a
-        else:
-            return b
-
-    # This is nearly the same as _bigger_version, except that
-    #   - It returns a boolean value
-    #   - If max_version is None, it just returns False
-    #   - It treats 2.13 as == 2.13.0 instead of 2.13 as < 2.13.0
-    # The second property is particularly important with directory hierarchies
-    def version_greater_or_equal_to_max(a, max_version):
-        if not max_version:
-            return False
-        a_nums = a.split('.')
-        b_nums = max_version.split('.')
-        num_fields = min(len(a_nums), len(b_nums))
-        for i in range(num_fields):
-            if   int(a_nums[i]) > int(b_nums[i]):
-                return True
-            elif int(a_nums[i]) < int(b_nums[i]):
-                return False
-        return True
-
-    biggest = None
-    versions = [ v.rstrip(os.path.sep) for v in versions ]
-
-    for version in versions:
-        if ((biggest is None or version == bigger_version(biggest, version)) and \
-            not version_greater_or_equal_to_max(version, max_version)):
-            biggest = version
-    return biggest
+from downloadsites import SITE_KINDS
 
 
 class Options:
     def __init__(self, filename):
         self.filename = filename
-        self.mirrors = {}
         self.release_sets = defaultdict(list)
         self.version_limit = {}
         self.real_name = {}
@@ -114,48 +44,41 @@ class Options:
         self.module_locations = []
         self._read_conversion_info()
 
-    def get_download_site(self, modulename):
-        for module, location in self.module_locations:
-            if module == modulename:
-                return location.format(module=modulename)
+    def get_module_info(self, modulename):
+        realname = self.real_name.get(modulename, modulename)
+        limit = self.version_limit.get(modulename, None)
 
-        if self.default_site:
-            return self.default_site.format(module=modulename)
+        for module, site in self.module_locations:
+            if module == realname:
+                break
+        else:
+            site = self.default_site
 
-        raise IOError('No download site found!\n')
+        return realname, limit, site
 
     def _get_locations(self, locations_node):
         for node in locations_node.childNodes:
             if node.nodeType != Node.ELEMENT_NODE:
                 continue
             if node.nodeName == 'site':
+                if node.attributes.get('kind'):
+                    kind = node.attributes.get('kind').nodeValue
+                else:
+                    kind = 'tarballs'
+
                 location = node.attributes.get('location').nodeValue
+                site = SITE_KINDS[kind](location)
+
                 if node.attributes.get('default') is not None:
                     assert self.default_site is None, "only one default site can be specified"
-                    self.default_site = location
+                    self.default_site = site
                 elif node.attributes.get('module') is not None:
                     module = node.attributes.get('module').nodeValue
-                    self.module_locations.append([module, location])
+                    self.module_locations.append([module, site])
             else:
                 sys.stderr.write('Bad location node\n')
                 sys.exit(1)
 
-    def _get_mirrors(self, mirrors_node):
-        for node in mirrors_node.childNodes:
-            if node.nodeType != Node.ELEMENT_NODE:
-                continue
-            if node.nodeName == 'mirror':
-                old = node.attributes.get('location').nodeValue
-                new = node.attributes.get('alternate').nodeValue
-                u = urllib.parse.urlparse(old)
-
-                # Only add the mirror if we don't have one
-                if (u.scheme, u.hostname) not in self.mirrors:
-                    self.mirrors[(u.scheme, u.hostname)] = (old, new)
-            else:
-                sys.stderr.write('Bad mirror node\n')
-                sys.exit(1)
-
     def _get_modulelist(self, modulelist_node):
         for node in modulelist_node.childNodes:
             if node.nodeType != Node.ELEMENT_NODE:
@@ -183,12 +106,6 @@ class Options:
                 sys.stderr.write('Bad whitelist node\n')
                 sys.exit(1)
 
-    def get_version_limit(self, modulename):
-        return self.version_limit.get(modulename, None)
-
-    def get_real_name(self, modulename):
-        return self.real_name.get(modulename, modulename)
-
     def _read_conversion_info(self):
         document = minidom.parse(self.filename)
         conversion_stuff = document.documentElement
@@ -197,8 +114,6 @@ class Options:
                 continue
             if node.nodeName == 'locations':
                 self._get_locations(node)
-            elif node.nodeName == 'mirrors':
-                self._get_mirrors(node)
             elif node.nodeName == 'whitelist':
                 self._get_modulelist(node)
             else:
@@ -206,296 +121,123 @@ class Options:
                                  node.nodeName + '\n')
                 sys.exit(1)
 
-class TarballLocator:
-    def __init__(self, mirrors):
-        self.have_sftp = self._test_sftp()
-        self.cache = {}
-        for key in list(mirrors.keys()):
-            mirror = mirrors[key]
-            if mirror[1].startswith('sftp://'):
-                hostname = urllib.parse.urlparse(mirror[1]).hostname
-                if not self.have_sftp or not self._test_sftp_host(hostname):
-                    sys.stderr.write("WARNING: Removing sftp mirror %s due to non-working sftp setup\n" % 
mirror[1])
-                    del(mirrors[key])
-        self.mirrors = mirrors
-
-    def cleanup(self):
-        """Clean connection cache, close any connections"""
-        if 'sftp' in self.cache:
-            for connection in self.cache['sftp'].values():
-                connection.sock.get_transport().close()
-
-    def _test_sftp(self):
-        """Perform a best effort guess to determine if sftp is available"""
-        global have_sftp
-        if not have_sftp: return False
-
-        try:
-            self.sftp_cfg = paramiko.SSHConfig()
-            self.sftp_cfg.parse(open(os.path.expanduser('~/.ssh/config'), 'r'))
-
-            self.sftp_keys = paramiko.Agent().get_keys()
-            if not len(self.sftp_keys): raise KeyError('no sftp_keys')
-
-            self.sftp_hosts = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts'))
-        except:
-            have_sftp = False
-
-        return have_sftp
-
-    def _test_sftp_host(self, hostname):
-        do_sftp = True
-        try:
-            cfg = self.sftp_cfg.lookup(hostname)
-            cfg.get('user') # require a username to be defined
-            if not hostname in self.sftp_hosts: raise KeyError('unknown hostname')
-        except KeyError:
-            do_sftp = False
-
-        return do_sftp
-
-    def _get_files_from_sftp(self, parsed_url, max_version):
-        hostname = parsed_url.hostname
-
-        if hostname in self.cache.setdefault('sftp', {}):
-            sftp = self.cache['sftp'][hostname]
-        else:
-            hostkeytype = list(self.sftp_hosts[hostname].keys())[0]
-            hostkey = self.sftp_hosts[hostname][hostkeytype]
-            cfg = self.sftp_cfg.lookup(hostname)
-            hostname = cfg.get('hostname', hostname).replace('%h', hostname)
-            port = parsed_url.port or cfg.get('port', 22)
-            username = parsed_url.username or cfg.get('user')
-
-            t = paramiko.Transport((hostname, port))
-            t.sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
-            t.connect(hostkey=hostkey)
-
-            for key in self.sftp_keys:
-                try:
-                    t.auth_publickey(username, key)
-                    break
-                except paramiko.SSHException:
-                    pass
-
-            if not t.is_authenticated():
-                t.close()
-                sys.stderr('ERROR: Cannot authenticate to %s' % hostname)
-                sys.exit(1)
-
-            sftp = paramiko.SFTPClient.from_transport(t)
-            self.cache['sftp'][hostname] = sftp
-
-        path = parsed_url.path
-        good_dir = re.compile('^([0-9]+\.)*[0-9]+$')
-        def hasdirs(x): return good_dir.search(x)
-        while True:
-            files = sftp.listdir(path)
-
-            newdirs = list(filter(hasdirs, files))
-            if newdirs:
-                newdir = get_latest_version(newdirs, max_version)
-                path = posixjoin(path, newdir)
-            else:
-                break
-
-        newloc = list(parsed_url)
-        newloc[2] = path
-        location = urllib.parse.urlunparse(newloc)
-        return location, files
-
-    def _get_files_from_http(self, parsed_url, max_version):
-        good_dir = re.compile('^([0-9]+\.)*[0-9]+/?$')
-        def hasdirs(x): return good_dir.search(x)
-        def fixdirs(x): return re.sub(r'^([0-9]+\.[0-9]+)/?$', r'\1', x)
-        location = urllib.parse.urlunparse(parsed_url)
-        # Follow 302 codes when retrieving URLs, speeds up conversion by 60sec
-        redirect_location = location
-        while True:
-            req = requests.get(redirect_location)
-            files = get_links(req.text)
-
-            # Check to see if we need to descend to a subdirectory
-            newdirs = [fixdirs(dir) for dir in files if hasdirs(dir)]
-            if newdirs:
-                newdir = get_latest_version(newdirs, max_version)
-                redirect_location = posixjoin(req.url, newdir, "")
-                location = posixjoin(location, newdir, "")
-            else:
-                break
-        return location, files
-
-    _get_files_from_https = _get_files_from_http
-
-    def find_tarball(self, baselocation, modulename, max_version):
-        print("LOOKING for " + modulename + " tarball at " + baselocation)
-        u = urllib.parse.urlparse(baselocation)
-
-        mirror = self.mirrors.get((u.scheme, u.hostname), None)
-        if mirror:
-            baselocation = baselocation.replace(mirror[0], mirror[1], 1)
-            u = urllib.parse.urlparse(baselocation)
-
-        # Determine which function handles the actual retrieving
-        locator = getattr(self, '_get_files_from_%s' % u.scheme, None)
-        if locator is None:
-            sys.stderr.write('Invalid location for ' + modulename + ': ' +
-                             baselocation + '\n')
-            sys.exit(1)
-
-        location, files = locator(u, max_version)
-        files = files or []
-        
-        basenames = set()
-        tarballs = []
-        if location.find("ftp.debian.org") != -1:
-            extensions = [
-                '.tar.xz',
-                '.tar.bz2',
-                '.tar.gz',
-            ]
-        else:
-            extensions = [
-                '.tar.xz',
-                'orig.tar.bz2',
-                '.tar.bz2',
-                'orig.tar.gz',
-                '.tar.gz',
-            ]
-
-
-        # Has to be checked by extension first; we prefer .tar.xz over .tar.bz2 and .tar.gz
-        for ext in extensions:
-            for file in files:
-                basename = file[:-len(ext)] # only valid when file ends with ext
-                if file.endswith(ext) and basename not in basenames:
-                    basenames.add(basename)
-                    tarballs.append(file)
-
-        # Only include tarballs for the given module
-        tarballs = [tarball for tarball in tarballs if modulename in tarball]
-
-        re_tarball = r'^'+re.escape(modulename)+'[_-](([0-9]+[\.\-])*[0-9]+)(\.orig)?\.tar.*$'
-        ## Don't include -beta -installer -stub-installer and all kinds of
-        ## other stupid inane tarballs, and also filter tarballs that have a
-        ## name that includes the module name but is different (eg, Gnome2-VFS
-        ## instead of Gnome2)
-        tarballs = [t for t in tarballs if re.search(re_tarball, t)]
-
-        versions = [re.sub(re_tarball, r'\1', t) for t in tarballs]
-
-        if not len(versions):
-            raise IOError('No versions found')
-        version = get_latest_version(versions, max_version)
-        index = versions.index(version)
-
-        location = posixjoin(location, tarballs[index])
-        if mirror: # XXX - doesn't undo everything -- not needed
-            location = location.replace(mirror[1], mirror[0], 1)
-
-        return location, version
-
 
 class ConvertToTarballs:
-    def __init__(self, options, locator, directory, convert=True):
+    def __init__(self, options, directory, convert=True, refs=False):
         self.options = options
-        self.locator = locator
         self.convert = convert
+        self.refs = refs
 
         self.all_tarballs = []
         self.all_versions = []
 
-        self.ignored_tarballs = []
+        self.errors = []
+        self.warnings = []
 
         with open(os.path.join(directory, 'project.conf')) as f:
             projectconf = yaml.safe_load(f)
             self.aliases = projectconf['aliases']
 
-    def find_tarball_by_name(self, name):
-        real_name = self.options.get_real_name(name)
-        max_version = self.options.get_version_limit(name)
-        baselocation = self.options.get_download_site(real_name)
+    def _get_module_kind(self, element):
+        if 'sources' not in element:
+            return 'skip'
 
-        # Ask the locator to hunt down a tarball
-        location, version = self.locator.find_tarball(baselocation, real_name, max_version)
-        # Save the versions
-        self.all_tarballs.append(name)
-        self.all_versions.append(version)
+        kind = element['sources'][0]['kind']
 
-        return location, version
+        if kind == 'local':
+            return 'skip'
+        elif kind.startswith('git'):
+            return 'git'
 
-    def write_bst_file(self, fullpath, element, location):
-        # Replace the first source with a tarball
-        element['sources'][0]['kind'] = 'tar'
-        element['sources'][0]['url'] = location
+        assert kind in ('tar', 'zip'), 'unexpected source kind {}'.format(kind)
+        return 'tarball'
 
-        if 'submodules' in element['sources'][0]:
-            del element['sources'][0]['submodules']
+    def _convert_one_module(self, name, fatal):
+        errors = self.errors if fatal else self.warnings
+        real_name, max_version, site = self.options.get_module_info(name)
 
-        # we may not have track if we are updating for a stable release
-        if 'track' in element['sources'][0]:
-            del element['sources'][0]['track']
+        if site.modules and real_name not in site.modules:
+            errors.append(name)
+            return None, None
 
-        # cargo sources shouldn't be needed in tarballs as tarballs should
-        # vendor their dependencies
-        element['sources'] = [source for source in element['sources'] if source['kind'] != 'cargo']
+        location, version, checksum = site.find_tarball(real_name, max_version, self.refs)
+
+        if None in (location, version):
+            errors.append(name)
+        else:
+            self.all_tarballs.append(name)
+            self.all_versions.append(version)
+
+        return location, checksum
+
+    def _write_bst_file(self, fullpath, element, location, checksum):
+        for alias, url in self.aliases.items():
+            if location.startswith(url):
+                location = alias + ':' + location[len(url):]
+
+        if self._get_module_kind(element) == 'git':
+            # Replace the first source with a tarball
+            element['sources'][0] = { 'kind': 'tar', 'url': location}
+            if checksum:
+                element['sources'][0]['ref'] = checksum
+
+            # cargo sources shouldn't be needed in tarballs as tarballs should
+            # vendor their dependencies
+            element['sources'] = [source for source in element['sources'] if source['kind'] != 'cargo']
+        elif element['sources'][0]['url'] == location:
+            # only change existing tarballs if the url changed. this allows us to invalidate the
+            # ref if we don't have a new one
+            return
+        else:
+            element['sources'][0]['url'] = location
+            if checksum or 'ref' in element['sources'][0]:
+                element['sources'][0]['ref'] = checksum
 
         # Dump it now
         with open(fullpath, 'w') as f:
             yaml.round_trip_dump(element, f)
 
-    def process_one_file(self, dirname, basename):
-        module_name = re.sub('\.bst$', '', basename)
-        fullpath = os.path.join(dirname, basename)
-
-        element = None
-        with open(fullpath) as f:
-            try:
-                element = yaml.load(f, yaml.loader.RoundTripLoader)
-            except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
-                raise Exception("Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark))
+    def print_errors(self):
+        print("\033[91mErrors:\033[0m") # in red
+        for module in self.errors:
+            print("- Can't find tarball for module '{}'".format(module))
 
-        if element.get('kind', None) == 'stack':
-            print("IGNORE stack element {}".format(basename))
-            return
+    def print_warnings(self):
+        print("Warnings:")
+        for module in self.warnings:
+            print("- Can't update tarball for module '{}'".format(module))
 
-        sources = element.get('sources', [])
-        if not sources:
-            print("IGNORE element without sources {}".format(basename))
-            return
+    def convert_modules(self, directories):
+        to_convert = []
+        to_update = []
 
-        kind = sources[0].get('kind', None)
-        if kind == 'local':
-            print("IGNORE element with only local sources {}".format(basename))
-            return
+        for directory in directories:
+            for filename in os.listdir(directory):
+                if not filename.endswith('.bst'):
+                    continue
 
-        if not self.convert and kind.startswith('git'):
-            print("IGNORE git element {} (not converting)".format(basename))
-            return
-
-        try:
-            print("REWRITE {}".format(basename))
-            location, version = self.find_tarball_by_name(module_name)
+                name = filename[:-len('.bst')]
+                fullpath = os.path.join(directory, filename)
 
-            for alias, url in self.aliases.items():
-                if location.startswith(url):
-                    location = alias + ':' + location[len(url):]
+                with open(fullpath) as f:
+                    element = yaml.round_trip_load(f)
 
-            self.write_bst_file(fullpath, element, location)
+                module_kind = self._get_module_kind(element)
+                if module_kind == 'git':
+                    to_convert.append((name, fullpath, element))
+                elif module_kind == 'tarball':
+                    to_update.append((name, fullpath, element))
 
-        except IOError:
-            if kind == 'tar' or kind == 'zip':
-                print('IGNORE: Could not find site for ' + module_name)
-                self.ignored_tarballs.append(module_name)
-            else:
-                print('FATAL: Could not find site for ' + module_name)
-                sys.exit(1)
+        if self.convert and to_convert:
+            for name, fullpath, element in tqdm(to_convert, 'Converting git repos', unit=''):
+                location, checksum = self._convert_one_module(name, True)
+                if location:
+                    self._write_bst_file(fullpath, element, location, checksum)
 
-    def process_bst_files(self, directory):
-        for root, dirs, files in os.walk(directory):
-            for name in files:
-                if name.endswith(".bst"):
-                    self.process_one_file(root, name)
+        for name, fullpath, element in tqdm(to_update, 'Updating existing tarballs', unit=''):
+            location, checksum = self._convert_one_module(name, False)
+            if location:
+                self._write_bst_file(fullpath, element, location, checksum)
 
     def create_versions_file(self):
         versions = []
@@ -507,7 +249,7 @@ class ConvertToTarballs:
             versions.append('## %s\n' % release_set.upper())
 
             for module in sorted(modules):
-                real_module = self.options.get_real_name(module)
+                real_module, _, _ = self.options.get_module_info(module)
                 index = self.all_tarballs.index(module)
                 version = self.all_versions[index]
 
@@ -572,25 +314,30 @@ def main(args):
         parser.print_help()
         sys.exit(1)
 
-    locator = TarballLocator(config.mirrors)
-    convert = ConvertToTarballs(config, locator, options.directory, options.convert)
-    convert.process_bst_files(os.path.join(options.directory, 'elements', 'core-deps'))
-    convert.process_bst_files(os.path.join(options.directory, 'elements', 'core'))
-    convert.process_bst_files(os.path.join(options.directory, 'elements', 'sdk'))
+    if int(splitted_version[1]) % 2 == 0:
+        flatpak_branch = '{}.{}'.format(splitted_version[0], splitted_version[1])
+        update_flatpak_branch = True
+    elif int(splitted_version[2]) >= 90:
+        flatpak_branch = '{}.{}beta'.format(splitted_version[0], int(splitted_version[1]) + 1)
+        update_flatpak_branch = True
+    else:
+        update_flatpak_branch = False
+
+    convert = ConvertToTarballs(config, options.directory, options.convert, update_flatpak_branch)
+    convert.convert_modules([os.path.join(options.directory, 'elements', directory)
+                             for directory in ('core-deps', 'core', 'sdk')])
+
+    if convert.errors:
+        convert.print_errors()
+        exit(1)
+
+    if convert.warnings:
+        convert.print_warnings()
 
     if options.convert:
         convert.create_versions_file()
 
         # update variables in the .gitlab-ci.yml
-        if int(splitted_version[1]) % 2 == 0:
-               flatpak_branch = '{}.{}'.format(splitted_version[0], splitted_version[1])
-               update_flatpak_branch = True
-        elif int(splitted_version[2]) >= 90:
-               flatpak_branch = '{}.{}beta'.format(splitted_version[0], int(splitted_version[1]) + 1)
-               update_flatpak_branch = True
-        else:
-               update_flatpak_branch = False
-
         if update_flatpak_branch:
             cifile = os.path.join(options.directory, '.gitlab-ci.yml')
             with open(cifile) as f:
@@ -637,11 +384,6 @@ def main(args):
 
                 os.unlink(junctionrefs)
 
-    if convert.ignored_tarballs:
-        print("Could not find a download site for the following modules:")
-        for module_name in convert.ignored_tarballs:
-            print("- {}".format(module_name))
-
 if __name__ == '__main__':
     try:
       main(sys.argv)
diff --git a/tools/smoketesting/downloadsites.py b/tools/smoketesting/downloadsites.py
new file mode 100644
index 0000000..68309d7
--- /dev/null
+++ b/tools/smoketesting/downloadsites.py
@@ -0,0 +1,155 @@
+# Copyright 2020, Abderrahim Kitouni
+#
+# based on code from convert-to-tarballs.py
+#
+# Copyright 2005-2008, Elijah Newren
+# Copyright 2007-2009, Olav Vitters
+# Copyright 2006-2009, Vincent Untz
+# Copyright 2017, Codethink Limited
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
+# USA
+
+import re
+import requests
+import os
+
+from html.parser import HTMLParser
+from posixpath import join as posixjoin # Handy for URLs
+
+# Classes that define the different types of sites we can download from
+class DownloadSite:
+    def __init__(self, baseurl):
+        self.modules = set()
+        self.baseurl = baseurl
+    def find_tarball(self, modulename, max_version, wantchecksum):
+        raise NotImplementedError
+
+class Tarballs(DownloadSite):
+    def __init__(self, baseurl):
+        super().__init__(baseurl)
+
+    def find_tarball(self, modulename, max_version, wantchecksum):
+        good_dir = re.compile('^([0-9]+\.)*[0-9]+/?$')
+        def hasdirs(x): return good_dir.search(x)
+        def fixdirs(x): return re.sub(r'^([0-9]+\.[0-9]+)/?$', r'\1', x)
+
+        location = self.baseurl.format(module=modulename)
+
+        while True:
+            req = requests.get(location)
+            req.raise_for_status()
+            files = get_links(req.text)
+
+            # Check to see if we need to descend to a subdirectory
+            newdirs = [fixdirs(dir) for dir in files if hasdirs(dir)]
+            if newdirs:
+                assert max_version is None or len(max_version.split('.')) <= 2, "limit can't have micro 
version when the project uses subdirs"
+                newdir = get_latest_version(newdirs, max_version)
+                location = posixjoin(req.url, newdir, "")
+            else:
+                break
+
+        basenames = set()
+        tarballs = []
+        extensions = ['.tar.xz', '.tar.bz2', '.tar.gz']
+
+        # Has to be checked by extension first; we prefer .tar.xz over .tar.bz2 and .tar.gz
+        for ext in extensions:
+            for file in files:
+                basename = file[:-len(ext)] # only valid when file ends with ext
+                if file.endswith(ext) and basename not in basenames:
+                    basenames.add(basename)
+                    tarballs.append(file)
+
+        re_tarball = r'^'+re.escape(modulename)+'[_-](([0-9]+[\.\-])*[0-9]+)(\.orig)?\.tar.*$'
+
+        tarballs = [t for t in tarballs if re.search(re_tarball, t)]
+        versions = [re.sub(re_tarball, r'\1', t) for t in tarballs]
+
+        if not versions:
+            return None, None, None
+
+        version = get_latest_version(versions, max_version)
+        index = versions.index(version)
+
+        location = posixjoin(location, tarballs[index])
+
+        return location, version, None
+
+# mapping from name to DownloadSite subclasses
+SITE_KINDS = {
+    'tarballs': Tarballs,
+}
+
+# utility functions
+def get_links(html):
+    class urllister(HTMLParser):
+        def reset(self):
+            HTMLParser.reset(self)
+            self.urls = []
+
+        def handle_starttag(self, tag, attrs):
+            if tag == 'a':
+                href = [v for k, v in attrs if k=='href']
+                if href:
+                    self.urls.extend(href)
+
+    parser = urllister()
+    parser.feed(html)
+    parser.close()
+    return parser.urls
+
+def get_latest_version(versions, max_version):
+    def bigger_version(a, b):
+        a_nums = a.split('.')
+        b_nums = b.split('.')
+        num_fields = min(len(a_nums), len(b_nums))
+        for i in range(0,num_fields):
+            if   int(a_nums[i]) > int(b_nums[i]):
+                return a
+            elif int(a_nums[i]) < int(b_nums[i]):
+                return b
+        if len(a_nums) > len(b_nums):
+            return a
+        else:
+            return b
+
+    # This is nearly the same as _bigger_version, except that
+    #   - It returns a boolean value
+    #   - If max_version is None, it just returns False
+    #   - It treats 2.13 as == 2.13.0 instead of 2.13 as < 2.13.0
+    # The second property is particularly important with directory hierarchies
+    def version_greater_or_equal_to_max(a, max_version):
+        if not max_version:
+            return False
+        a_nums = a.split('.')
+        b_nums = max_version.split('.')
+        num_fields = min(len(a_nums), len(b_nums))
+        for i in range(num_fields):
+            if   int(a_nums[i]) > int(b_nums[i]):
+                return True
+            elif int(a_nums[i]) < int(b_nums[i]):
+                return False
+        return True
+
+    biggest = None
+    versions = [ v.rstrip(os.path.sep) for v in versions ]
+
+    for version in versions:
+        if ((biggest is None or version == bigger_version(biggest, version)) and \
+            not version_greater_or_equal_to_max(version, max_version)):
+            biggest = version
+    return biggest
diff --git a/tools/smoketesting/tarball-conversion.config b/tools/smoketesting/tarball-conversion.config
index c08c033..3564dd9 100644
--- a/tools/smoketesting/tarball-conversion.config
+++ b/tools/smoketesting/tarball-conversion.config
@@ -28,9 +28,6 @@
     <site module="udisks"                    location="https://udisks.freedesktop.org/releases/"/>
     <site module="uhttpmock"                 location="https://tecnocode.co.uk/downloads/uhttpmock/"/>
   </locations>
-  <mirrors>
-    <mirror location="https://download.gnome.org/"; alternate="sftp://master.gnome.org/ftp/pub/GNOME/"; />
-  </mirrors>
   <whitelist>
     ## EXTERNAL
     <package name="ModemManager"/>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]