| 1 | 1 |  #
 | 
| 2 | 2 |  #  Copyright (C) 2016 Codethink Limited
 | 
|  | 3 | +#  Copyright (C) 2019 Bloomberg L.P.
 | 
| 3 | 4 |  #
 | 
| 4 | 5 |  #  This program is free software; you can redistribute it and/or
 | 
| 5 | 6 |  #  modify it under the terms of the GNU Lesser General Public
 | 
| ... | ... | @@ -16,15 +17,37 @@ | 
| 16 | 17 |  #
 | 
| 17 | 18 |  #  Authors:
 | 
| 18 | 19 |  #        Tristan Van Berkom <tristan vanberkom codethink co uk>
 | 
|  | 20 | +#        Daniel Silverstone <daniel silverstone codethink co uk>
 | 
| 19 | 21 |  
 | 
| 20 | 22 |  import re
 | 
|  | 23 | +import sys
 | 
| 21 | 24 |  
 | 
| 22 | 25 |  from ._exceptions import LoadError, LoadErrorReason
 | 
| 23 | 26 |  from . import _yaml
 | 
| 24 | 27 |  
 | 
| 25 | 28 |  # Variables are allowed to have dashes here
 | 
| 26 | 29 |  #
 | 
| 27 |  | -_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
 | 
|  | 30 | +PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 | 
|  | 31 | +
 | 
|  | 32 | +
 | 
|  | 33 | +# Throughout this code you will see variables named things like `expstr`.
 | 
|  | 34 | +# These hold data structures called "expansion strings" and are the parsed
 | 
|  | 35 | +# form of the strings which are the input to this subsystem.  Strings
 | 
|  | 36 | +# such as "Hello %{name}, how are you?" are parsed into the form:
 | 
|  | 37 | +# (3, ["Hello ", "name", ", how are you?"])
 | 
|  | 38 | +# i.e. a tuple of an integer and a list, where the integer is the cached
 | 
|  | 39 | +# length of the list, and the list consists of one or more strings.
 | 
|  | 40 | +# Strings in even indices of the list (0, 2, 4, etc) are constants which
 | 
|  | 41 | +# are copied into the output of the expansion algorithm.  Strings in the
 | 
|  | 42 | +# odd indices (1, 3, 5, etc) are the names of further expansions to make.
 | 
|  | 43 | +# In the example above, first "Hello " is copied, then "name" is expanded
 | 
|  | 44 | +# and so must be another named expansion string passed in to the constructor
 | 
|  | 45 | +# of the Variables class, and whatever is yielded from the expansion of "name"
 | 
|  | 46 | +# is added to the concatenation for the result.  Finally ", how are you?" is
 | 
|  | 47 | +# copied in and the whole lot concatenated for return.
 | 
|  | 48 | +#
 | 
|  | 49 | +# To see how strings are parsed, see `_parse_expstr()` after the class, and
 | 
|  | 50 | +# to see how expansion strings are expanded, see `_expand_expstr()` after that.
 | 
| 28 | 51 |  
 | 
| 29 | 52 |  
 | 
| 30 | 53 |  # The Variables helper object will resolve the variable references in
 | 
| ... | ... | @@ -38,14 +61,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' | 
| 38 | 61 |  #     node (dict): A node loaded and composited with yaml tools
 | 
| 39 | 62 |  #
 | 
| 40 | 63 |  # Raises:
 | 
| 41 |  | -#     LoadError, if unresolved variables occur.
 | 
|  | 64 | +#     LoadError, if unresolved variables, or cycles in resolution, occur.
 | 
| 42 | 65 |  #
 | 
| 43 | 66 |  class Variables():
 | 
| 44 | 67 |  
 | 
| 45 | 68 |      def __init__(self, node):
 | 
| 46 | 69 |  
 | 
| 47 | 70 |          self.original = node
 | 
| 48 |  | -        self.variables = self._resolve(node)
 | 
|  | 71 | +        self._expstr_map = self._resolve(node)
 | 
|  | 72 | +        self.flat = self._flatten()
 | 
| 49 | 73 |  
 | 
| 50 | 74 |      # subst():
 | 
| 51 | 75 |      #
 | 
| ... | ... | @@ -61,139 +85,167 @@ class Variables(): | 
| 61 | 85 |      #    LoadError, if the string contains unresolved variable references.
 | 
| 62 | 86 |      #
 | 
| 63 | 87 |      def subst(self, string):
 | 
| 64 |  | -        substitute, unmatched, _ = self._subst(string, self.variables)
 | 
| 65 |  | -        unmatched = list(set(unmatched))
 | 
| 66 |  | -        if unmatched:
 | 
| 67 |  | -            if len(unmatched) == 1:
 | 
| 68 |  | -                message = "Unresolved variable '{var}'".format(var=unmatched[0])
 | 
| 69 |  | -            else:
 | 
| 70 |  | -                message = "Unresolved variables: "
 | 
| 71 |  | -                for unmatch in unmatched:
 | 
| 72 |  | -                    if unmatched.index(unmatch) > 0:
 | 
| 73 |  | -                        message += ', '
 | 
| 74 |  | -                    message += unmatch
 | 
| 75 |  | -
 | 
| 76 |  | -            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
 | 
| 77 |  | -
 | 
| 78 |  | -        return substitute
 | 
| 79 |  | -
 | 
| 80 |  | -    def _subst(self, string, variables):
 | 
| 81 |  | -
 | 
| 82 |  | -        def subst_callback(match):
 | 
| 83 |  | -            nonlocal variables
 | 
| 84 |  | -            nonlocal unmatched
 | 
| 85 |  | -            nonlocal matched
 | 
| 86 |  | -
 | 
| 87 |  | -            token = match.group(0)
 | 
| 88 |  | -            varname = match.group(1)
 | 
| 89 |  | -
 | 
| 90 |  | -            value = _yaml.node_get(variables, str, varname, default_value=None)
 | 
| 91 |  | -            if value is not None:
 | 
| 92 |  | -                # We have to check if the inner string has variables
 | 
| 93 |  | -                # and return unmatches for those
 | 
| 94 |  | -                unmatched += re.findall(_VARIABLE_MATCH, value)
 | 
| 95 |  | -                matched += [varname]
 | 
| 96 |  | -            else:
 | 
| 97 |  | -                # Return unmodified token
 | 
| 98 |  | -                unmatched += [varname]
 | 
| 99 |  | -                value = token
 | 
| 100 |  | -
 | 
| 101 |  | -            return value
 | 
| 102 |  | -
 | 
| 103 |  | -        matched = []
 | 
| 104 |  | -        unmatched = []
 | 
| 105 |  | -        replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
 | 
| 106 |  | -
 | 
| 107 |  | -        return (replacement, unmatched, matched)
 | 
|  | 88 | +        expstr = _parse_expstr(string)
 | 
|  | 89 | +
 | 
|  | 90 | +        try:
 | 
|  | 91 | +            return _expand_expstr(self._expstr_map, expstr)
 | 
|  | 92 | +        except KeyError:
 | 
|  | 93 | +            unmatched = []
 | 
|  | 94 | +
 | 
|  | 95 | +            # Look for any unmatched variable names in the expansion string
 | 
|  | 96 | +            for var in expstr[1][1::2]:
 | 
|  | 97 | +                if var not in self._expstr_map:
 | 
|  | 98 | +                    unmatched.append(var)
 | 
|  | 99 | +
 | 
|  | 100 | +            if unmatched:
 | 
|  | 101 | +                message = "Unresolved variable{}: {}".format(
 | 
|  | 102 | +                    "s" if len(unmatched) > 1 else "",
 | 
|  | 103 | +                    ", ".join(unmatched)
 | 
|  | 104 | +                )
 | 
|  | 105 | +
 | 
|  | 106 | +                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
 | 
|  | 107 | +            # Otherwise, re-raise the KeyError since it clearly came from some
 | 
|  | 108 | +            # other unknowable cause.
 | 
|  | 109 | +            raise
 | 
| 108 | 110 |  
 | 
| 109 | 111 |      # Variable resolving code
 | 
| 110 | 112 |      #
 | 
| 111 |  | -    # Here we substitute variables for values (resolve variables) repeatedly
 | 
| 112 |  | -    # in a dictionary, each time creating a new dictionary until there is no
 | 
| 113 |  | -    # more unresolved variables to resolve, or, until resolving further no
 | 
| 114 |  | -    # longer resolves anything, in which case we throw an exception.
 | 
|  | 113 | +    # Here we resolve all of our inputs into a dictionary, ready for use
 | 
|  | 114 | +    # in subst()
 | 
| 115 | 115 |      def _resolve(self, node):
 | 
| 116 |  | -        variables = node
 | 
| 117 |  | -
 | 
| 118 | 116 |          # Special case, if notparallel is specified in the variables for this
 | 
| 119 | 117 |          # element, then override max-jobs to be 1.
 | 
| 120 | 118 |          # Initialize it as a string as all variables are processed as strings.
 | 
| 121 | 119 |          #
 | 
| 122 |  | -        if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
 | 
| 123 |  | -            variables['max-jobs'] = str(1)
 | 
| 124 |  | -
 | 
| 125 |  | -        # Resolve the dictionary once, reporting the new dictionary with things
 | 
| 126 |  | -        # substituted in it, and reporting unmatched tokens.
 | 
| 127 |  | -        #
 | 
| 128 |  | -        def resolve_one(variables):
 | 
| 129 |  | -            unmatched = []
 | 
| 130 |  | -            resolved = {}
 | 
| 131 |  | -
 | 
| 132 |  | -            for key, value in _yaml.node_items(variables):
 | 
| 133 |  | -
 | 
| 134 |  | -                # Ensure stringness of the value before substitution
 | 
| 135 |  | -                value = _yaml.node_get(variables, str, key)
 | 
| 136 |  | -
 | 
| 137 |  | -                resolved_var, item_unmatched, matched = self._subst(value, variables)
 | 
| 138 |  | -
 | 
| 139 |  | -                if _wrap_variable(key) in resolved_var:
 | 
| 140 |  | -                    referenced_through = find_recursive_variable(key, matched, variables)
 | 
|  | 120 | +        if _yaml.node_get(node, bool, 'notparallel', default_value=False):
 | 
|  | 121 | +            node['max-jobs'] = str(1)
 | 
|  | 122 | +
 | 
|  | 123 | +        ret = {}
 | 
|  | 124 | +        for key, value in _yaml.node_items(node):
 | 
|  | 125 | +            value = _yaml.node_get(node, str, key)
 | 
|  | 126 | +            ret[sys.intern(key)] = _parse_expstr(value)
 | 
|  | 127 | +        return ret
 | 
|  | 128 | +
 | 
|  | 129 | +    def _check_for_missing(self):
 | 
|  | 130 | +        # First the check for anything unresolvable
 | 
|  | 131 | +        summary = []
 | 
|  | 132 | +        for key, expstr in self._expstr_map.items():
 | 
|  | 133 | +            for var in expstr[1][1::2]:
 | 
|  | 134 | +                if var not in self._expstr_map:
 | 
|  | 135 | +                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
 | 
|  | 136 | +                    provenance = _yaml.node_get_provenance(self.original, key)
 | 
|  | 137 | +                    summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
 | 
|  | 138 | +        if summary:
 | 
|  | 139 | +            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
 | 
|  | 140 | +                            "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
 | 
|  | 141 | +
 | 
|  | 142 | +    def _check_for_cycles(self):
 | 
|  | 143 | +        # And now the cycle checks
 | 
|  | 144 | +        def cycle_check(expstr, visited, cleared):
 | 
|  | 145 | +            for var in expstr[1][1::2]:
 | 
|  | 146 | +                if var in cleared:
 | 
|  | 147 | +                    continue
 | 
|  | 148 | +                if var in visited:
 | 
| 141 | 149 |                      raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
 | 
| 142 |  | -                                    "{}: ".format(_yaml.node_get_provenance(variables, key)) +
 | 
|  | 150 | +                                    "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
 | 
| 143 | 151 |                                      ("Variable '{}' expands to contain a reference to itself. " +
 | 
| 144 |  | -                                     "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
 | 
| 145 |  | -
 | 
| 146 |  | -                resolved[key] = resolved_var
 | 
| 147 |  | -                unmatched += item_unmatched
 | 
| 148 |  | -
 | 
| 149 |  | -            # Carry over provenance
 | 
| 150 |  | -            resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
 | 
| 151 |  | -            return (resolved, unmatched)
 | 
| 152 |  | -
 | 
| 153 |  | -        # Resolve it until it's resolved or broken
 | 
| 154 |  | -        #
 | 
| 155 |  | -        resolved = variables
 | 
| 156 |  | -        unmatched = ['dummy']
 | 
| 157 |  | -        last_unmatched = ['dummy']
 | 
| 158 |  | -        while unmatched:
 | 
| 159 |  | -            resolved, unmatched = resolve_one(resolved)
 | 
| 160 |  | -
 | 
| 161 |  | -            # Lists of strings can be compared like this
 | 
| 162 |  | -            if unmatched == last_unmatched:
 | 
| 163 |  | -                # We've got the same result twice without matching everything,
 | 
| 164 |  | -                # something is undeclared or cyclic, compose a summary.
 | 
| 165 |  | -                #
 | 
| 166 |  | -                summary = ''
 | 
| 167 |  | -                for unmatch in set(unmatched):
 | 
| 168 |  | -                    for var, provenance in self._find_references(unmatch):
 | 
| 169 |  | -                        line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
 | 
| 170 |  | -                        summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
 | 
| 171 |  | -
 | 
| 172 |  | -                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
 | 
| 173 |  | -                                "Failed to resolve one or more variable:\n{}".format(summary))
 | 
| 174 |  | -
 | 
| 175 |  | -            last_unmatched = unmatched
 | 
| 176 |  | -
 | 
| 177 |  | -        return resolved
 | 
| 178 |  | -
 | 
| 179 |  | -    # Helper function to fetch information about the node referring to a variable
 | 
|  | 152 | +                                     "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
 | 
|  | 153 | +                visited.append(var)
 | 
|  | 154 | +                cycle_check(self._expstr_map[var], visited, cleared)
 | 
|  | 155 | +                visited.pop()
 | 
|  | 156 | +                cleared.add(var)
 | 
|  | 157 | +
 | 
|  | 158 | +        cleared = set()
 | 
|  | 159 | +        for key, expstr in self._expstr_map.items():
 | 
|  | 160 | +            if key not in cleared:
 | 
|  | 161 | +                cycle_check(expstr, [key], cleared)
 | 
|  | 162 | +
 | 
|  | 163 | +    # _flatten():
 | 
| 180 | 164 |      #
 | 
| 181 |  | -    def _find_references(self, varname):
 | 
| 182 |  | -        fullname = _wrap_variable(varname)
 | 
| 183 |  | -        for key, value in _yaml.node_items(self.original):
 | 
| 184 |  | -            if fullname in value:
 | 
| 185 |  | -                provenance = _yaml.node_get_provenance(self.original, key)
 | 
| 186 |  | -                yield (key, provenance)
 | 
| 187 |  | -
 | 
| 188 |  | -
 | 
| 189 |  | -def find_recursive_variable(variable, matched_variables, all_vars):
 | 
| 190 |  | -    matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
 | 
| 191 |  | -    for key, value in zip(matched_variables, matched_values):
 | 
| 192 |  | -        if _wrap_variable(variable) in value:
 | 
| 193 |  | -            return key
 | 
| 194 |  | -    # We failed to find a recursive variable
 | 
| 195 |  | -    return None
 | 
| 196 |  | -
 | 
| 197 |  | -
 | 
| 198 |  | -def _wrap_variable(var):
 | 
| 199 |  | -    return "%{" + var + "}" | 
|  | 165 | +    # Turn our dictionary of expansion strings into a flattened dict
 | 
|  | 166 | +    # so that we can run expansions faster in the future
 | 
|  | 167 | +    #
 | 
|  | 168 | +    # Raises:
 | 
|  | 169 | +    #    LoadError, if the string contains unresolved variable references or
 | 
|  | 170 | +    #               if cycles are detected in the variable references
 | 
|  | 171 | +    #
 | 
|  | 172 | +    def _flatten(self):
 | 
|  | 173 | +        flat = {}
 | 
|  | 174 | +        try:
 | 
|  | 175 | +            for key, expstr in self._expstr_map.items():
 | 
|  | 176 | +                if expstr[0] > 1:
 | 
|  | 177 | +                    expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))])
 | 
|  | 178 | +                    self._expstr_map[key] = expstr
 | 
|  | 179 | +                flat[key] = expstr[1][0]
 | 
|  | 180 | +        except KeyError:
 | 
|  | 181 | +            self._check_for_missing()
 | 
|  | 182 | +            raise
 | 
|  | 183 | +        except RecursionError:
 | 
|  | 184 | +            self._check_for_cycles()
 | 
|  | 185 | +            raise
 | 
|  | 186 | +        return flat
 | 
|  | 187 | +
 | 
|  | 188 | +
 | 
|  | 189 | +# Cache for the parsed expansion strings.  While this is nominally
 | 
|  | 190 | +# something which might "waste" memory, in reality each of these
 | 
|  | 191 | +# will live as long as the element which uses it, which is the
 | 
|  | 192 | +# vast majority of the memory usage across the execution of BuildStream.
 | 
|  | 193 | +PARSE_CACHE = {
 | 
|  | 194 | +    # Prime the cache with the empty string since otherwise that can
 | 
|  | 195 | +    # cause issues with the parser, complications to which cause slowdown
 | 
|  | 196 | +    "": (1, [""]),
 | 
|  | 197 | +}
 | 
|  | 198 | +
 | 
|  | 199 | +
 | 
|  | 200 | +# Helper to parse a string into an expansion string tuple, caching
 | 
|  | 201 | +# the results so that future parse requests don't need to think about
 | 
|  | 202 | +# the string
 | 
|  | 203 | +def _parse_expstr(instr):
 | 
|  | 204 | +    try:
 | 
|  | 205 | +        return PARSE_CACHE[instr]
 | 
|  | 206 | +    except KeyError:
 | 
|  | 207 | +        # This use of the regex turns a string like "foo %{bar} baz" into
 | 
|  | 208 | +        # a list ["foo ", "bar", " baz"]
 | 
|  | 209 | +        splits = PARSE_EXPANSION.split(instr)
 | 
|  | 210 | +        # If an expansion ends the string, we get an empty string on the end
 | 
|  | 211 | +        # which we can optimise away, making the expansion routines not need
 | 
|  | 212 | +        # a test for this.
 | 
|  | 213 | +        if splits[-1] == '':
 | 
|  | 214 | +            splits = splits[:-1]
 | 
|  | 215 | +        # Cache an interned copy of this.  We intern it to try and reduce the
 | 
|  | 216 | +        # memory impact of the cache.  It seems odd to cache the list length
 | 
|  | 217 | +        # but this is measurably cheaper than calculating it each time during
 | 
|  | 218 | +        # string expansion.
 | 
|  | 219 | +        PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
 | 
|  | 220 | +        return PARSE_CACHE[instr]
 | 
|  | 221 | +
 | 
|  | 222 | +
 | 
|  | 223 | +# Helper to expand a given top level expansion string tuple in the context
 | 
|  | 224 | +# of the given dictionary of expansion strings.
 | 
|  | 225 | +#
 | 
|  | 226 | +# Note: Will raise KeyError if any expansion is missing
 | 
|  | 227 | +def _expand_expstr(content, topvalue):
 | 
|  | 228 | +    # Short-circuit constant strings
 | 
|  | 229 | +    if topvalue[0] == 1:
 | 
|  | 230 | +        return topvalue[1][0]
 | 
|  | 231 | +
 | 
|  | 232 | +    # Short-circuit strings which are entirely an expansion of another variable
 | 
|  | 233 | +    # e.g. "%{another}"
 | 
|  | 234 | +    if topvalue[0] == 2 and topvalue[1][0] == "":
 | 
|  | 235 | +        return _expand_expstr(content, content[topvalue[1][1]])
 | 
|  | 236 | +
 | 
|  | 237 | +    # Otherwise process fully...
 | 
|  | 238 | +    def internal_expand(value):
 | 
|  | 239 | +        (expansion_len, expansion_bits) = value
 | 
|  | 240 | +        idx = 0
 | 
|  | 241 | +        while idx < expansion_len:
 | 
|  | 242 | +            # First yield any constant string content
 | 
|  | 243 | +            yield expansion_bits[idx]
 | 
|  | 244 | +            idx += 1
 | 
|  | 245 | +            # Now, if there is an expansion variable left to expand, yield
 | 
|  | 246 | +            # the expansion of that variable too
 | 
|  | 247 | +            if idx < expansion_len:
 | 
|  | 248 | +                yield from internal_expand(content[expansion_bits[idx]])
 | 
|  | 249 | +            idx += 1
 | 
|  | 250 | +
 | 
|  | 251 | +    return "".join(internal_expand(topvalue)) |