| ... | ... | @@ -30,7 +30,6 @@ See also: :ref:`sandboxing`. | 
| 30 | 30 |  from collections import OrderedDict
 | 
| 31 | 31 |  
 | 
| 32 | 32 |  import os
 | 
| 33 |  | -import tempfile
 | 
| 34 | 33 |  import stat
 | 
| 35 | 34 |  
 | 
| 36 | 35 |  from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
 | 
| ... | ... | @@ -51,6 +50,183 @@ class IndexEntry(): | 
| 51 | 50 |          self.modified = modified
 | 
| 52 | 51 |  
 | 
| 53 | 52 |  
 | 
|  | 53 | +class ResolutionException(VirtualDirectoryError):
 | 
|  | 54 | +    """ Superclass of all exceptions that can be raised by
 | 
|  | 55 | +    CasBasedDirectory._resolve. Should not be used outside this module. """
 | 
|  | 56 | +    pass
 | 
|  | 57 | +
 | 
|  | 58 | +
 | 
|  | 59 | +class InfiniteSymlinkException(ResolutionException):
 | 
|  | 60 | +    """ Raised when an infinite symlink loop is found. """
 | 
|  | 61 | +    pass
 | 
|  | 62 | +
 | 
|  | 63 | +
 | 
|  | 64 | +class AbsoluteSymlinkException(ResolutionException):
 | 
|  | 65 | +    """Raised if we try to follow an absolute symlink (i.e. one whose
 | 
|  | 66 | +    target starts with the path separator) and we have disallowed
 | 
|  | 67 | +    following such symlinks.
 | 
|  | 68 | +    """
 | 
|  | 69 | +    pass
 | 
|  | 70 | +
 | 
|  | 71 | +
 | 
|  | 72 | +class UnexpectedFileException(ResolutionException):
 | 
|  | 73 | +    """Raised if we were found a file where a directory or symlink was
 | 
|  | 74 | +    expected, for example we try to resolve a symlink pointing to
 | 
|  | 75 | +    /a/b/c but /a/b is a file.
 | 
|  | 76 | +    """
 | 
|  | 77 | +    def __init__(self, message=""):
 | 
|  | 78 | +        """Allow constructor with no arguments, since this can be raised in
 | 
|  | 79 | +        places where there isn't sufficient information to write the
 | 
|  | 80 | +        message.
 | 
|  | 81 | +        """
 | 
|  | 82 | +        super().__init__(message)
 | 
|  | 83 | +
 | 
|  | 84 | +
 | 
|  | 85 | +class _Resolver():
 | 
|  | 86 | +    """A class for resolving symlinks inside CAS-based directories. As
 | 
|  | 87 | +    well as providing a namespace for some functions, this also
 | 
|  | 88 | +    contains two flags which are constant throughout one resolution
 | 
|  | 89 | +    operation and the 'seen_objects' list used to detect infinite
 | 
|  | 90 | +    symlink loops.
 | 
|  | 91 | +
 | 
|  | 92 | +    """
 | 
|  | 93 | +
 | 
|  | 94 | +    def __init__(self, absolute_symlinks_resolve=True, force_create=False):
 | 
|  | 95 | +        self.absolute_symlinks_resolve = absolute_symlinks_resolve
 | 
|  | 96 | +        self.force_create = force_create
 | 
|  | 97 | +        self.seen_objects = []
 | 
|  | 98 | +
 | 
|  | 99 | +    def resolve(self, name, directory):
 | 
|  | 100 | +        """Resolves any name to an object. If the name points to a symlink in
 | 
|  | 101 | +        the directory, it returns the thing it points to,
 | 
|  | 102 | +        recursively.
 | 
|  | 103 | +
 | 
|  | 104 | +        Returns a CasBasedDirectory, FileNode or None. None indicates
 | 
|  | 105 | +        either that 'target' does not exist in this directory, or is a
 | 
|  | 106 | +        symlink chain which points to a nonexistent name (broken
 | 
|  | 107 | +        symlink).
 | 
|  | 108 | +
 | 
|  | 109 | +        Raises:
 | 
|  | 110 | +
 | 
|  | 111 | +        - InfiniteSymlinkException if 'name' points to an infinite
 | 
|  | 112 | +          symlink loop.
 | 
|  | 113 | +        - AbsoluteSymlinkException if 'name' points to an absolute
 | 
|  | 114 | +          symlink and absolute_symlinks_resolve is False.
 | 
|  | 115 | +        - UnexpectedFileException if at any point during resolution we
 | 
|  | 116 | +          find a file which we expected to be a directory or symlink.
 | 
|  | 117 | +
 | 
|  | 118 | +        If force_create is set, this will attempt to create
 | 
|  | 119 | +        directories to make symlinks and directories resolve.  Files
 | 
|  | 120 | +        present in symlink target paths will also be removed and
 | 
|  | 121 | +        replaced with directories.  If force_create is off, this will
 | 
|  | 122 | +        never alter 'directory'.
 | 
|  | 123 | +
 | 
|  | 124 | +        """
 | 
|  | 125 | +
 | 
|  | 126 | +        # First check for nonexistent things or 'normal' objects and return them
 | 
|  | 127 | +        if name not in directory.index:
 | 
|  | 128 | +            return None
 | 
|  | 129 | +        index_entry = directory.index[name]
 | 
|  | 130 | +        if isinstance(index_entry.buildstream_object, Directory):
 | 
|  | 131 | +            return index_entry.buildstream_object
 | 
|  | 132 | +        elif isinstance(index_entry.pb_object, remote_execution_pb2.FileNode):
 | 
|  | 133 | +            return index_entry.pb_object
 | 
|  | 134 | +
 | 
|  | 135 | +        # Now we must be dealing with a symlink.
 | 
|  | 136 | +        assert isinstance(index_entry.pb_object, remote_execution_pb2.SymlinkNode)
 | 
|  | 137 | +
 | 
|  | 138 | +        symlink_object = index_entry.pb_object
 | 
|  | 139 | +        if symlink_object in self.seen_objects:
 | 
|  | 140 | +            # Infinite symlink loop detected
 | 
|  | 141 | +            message = ("Infinite symlink loop found during resolution. " +
 | 
|  | 142 | +                       "First repeated element is {}".format(name))
 | 
|  | 143 | +            raise InfiniteSymlinkException(message=message)
 | 
|  | 144 | +
 | 
|  | 145 | +        self.seen_objects.append(symlink_object)
 | 
|  | 146 | +
 | 
|  | 147 | +        components = symlink_object.target.split(CasBasedDirectory._pb2_path_sep)
 | 
|  | 148 | +        absolute = symlink_object.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
 | 
|  | 149 | +
 | 
|  | 150 | +        if absolute:
 | 
|  | 151 | +            if self.absolute_symlinks_resolve:
 | 
|  | 152 | +                directory = directory.find_root()
 | 
|  | 153 | +                # Discard the first empty element
 | 
|  | 154 | +                components.pop(0)
 | 
|  | 155 | +            else:
 | 
|  | 156 | +                # Unresolvable absolute symlink
 | 
|  | 157 | +                message = "{} is an absolute symlink, which was disallowed during resolution".format(name)
 | 
|  | 158 | +                raise AbsoluteSymlinkException(message=message)
 | 
|  | 159 | +
 | 
|  | 160 | +        resolution = directory
 | 
|  | 161 | +        while components and isinstance(resolution, CasBasedDirectory):
 | 
|  | 162 | +            c = components.pop(0)
 | 
|  | 163 | +            directory = resolution
 | 
|  | 164 | +
 | 
|  | 165 | +            try:
 | 
|  | 166 | +                resolution = self._resolve_path_component(c, directory, components)
 | 
|  | 167 | +            except UnexpectedFileException as original:
 | 
|  | 168 | +                errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
 | 
|  | 169 | +                            "cannot proceed. The remaining path components are {}.")
 | 
|  | 170 | +                raise UnexpectedFileException(errormsg.format(c, components)) from original
 | 
|  | 171 | +
 | 
|  | 172 | +        return resolution
 | 
|  | 173 | +
 | 
|  | 174 | +    def _resolve_path_component(self, c, directory, components_remaining):
 | 
|  | 175 | +        if c == ".":
 | 
|  | 176 | +            resolution = directory
 | 
|  | 177 | +        elif c == "..":
 | 
|  | 178 | +            if directory.parent is not None:
 | 
|  | 179 | +                resolution = directory.parent
 | 
|  | 180 | +            else:
 | 
|  | 181 | +                # If directory.parent *is* None, this is an attempt to
 | 
|  | 182 | +                # access '..' from the root, which is valid under
 | 
|  | 183 | +                # POSIX; it just returns the root.
 | 
|  | 184 | +                resolution = directory
 | 
|  | 185 | +        elif c in directory.index:
 | 
|  | 186 | +            try:
 | 
|  | 187 | +                resolution = self._resolve_through_files(c, directory, components_remaining)
 | 
|  | 188 | +            except UnexpectedFileException as original:
 | 
|  | 189 | +                errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
 | 
|  | 190 | +                            "cannot proceed. The remaining path components are {}.")
 | 
|  | 191 | +                raise UnexpectedFileException(errormsg.format(c, components_remaining)) from original
 | 
|  | 192 | +        else:
 | 
|  | 193 | +            # c is not in our index
 | 
|  | 194 | +            if self.force_create:
 | 
|  | 195 | +                resolution = directory.descend(c, create=True)
 | 
|  | 196 | +            else:
 | 
|  | 197 | +                resolution = None
 | 
|  | 198 | +        return resolution
 | 
|  | 199 | +
 | 
|  | 200 | +    def _resolve_through_files(self, c, directory, require_traversable):
 | 
|  | 201 | +        """A wrapper to resolve() which deals with files being found
 | 
|  | 202 | +        in the middle of paths, for example trying to resolve a symlink
 | 
|  | 203 | +        which points to /usr/lib64/libfoo when 'lib64' is a file.
 | 
|  | 204 | +
 | 
|  | 205 | +        require_traversable: If this is True, never return a file
 | 
|  | 206 | +        node.  Instead, if force_create is set, destroy the file node,
 | 
|  | 207 | +        then create and return a normal directory in its place. If
 | 
|  | 208 | +        force_create is off, throws ResolutionException.
 | 
|  | 209 | +
 | 
|  | 210 | +        """
 | 
|  | 211 | +        resolved_thing = self.resolve(c, directory)
 | 
|  | 212 | +
 | 
|  | 213 | +        if isinstance(resolved_thing, remote_execution_pb2.FileNode):
 | 
|  | 214 | +            if require_traversable:
 | 
|  | 215 | +                # We have components still to resolve, but one of the path components
 | 
|  | 216 | +                # is a file.
 | 
|  | 217 | +                if self.force_create:
 | 
|  | 218 | +                    directory.delete_entry(c)
 | 
|  | 219 | +                    resolved_thing = directory.descend(c, create=True)
 | 
|  | 220 | +                else:
 | 
|  | 221 | +                    # This is a signal that we hit a file, but don't
 | 
|  | 222 | +                    # have the data to give a proper message, so the
 | 
|  | 223 | +                    # caller should reraise this with a proper
 | 
|  | 224 | +                    # description.
 | 
|  | 225 | +                    raise UnexpectedFileException()
 | 
|  | 226 | +
 | 
|  | 227 | +        return resolved_thing
 | 
|  | 228 | +
 | 
|  | 229 | +
 | 
| 54 | 230 |  # CasBasedDirectory intentionally doesn't call its superclass constuctor,
 | 
| 55 | 231 |  # which is meant to be unimplemented.
 | 
| 56 | 232 |  # pylint: disable=super-init-not-called
 | 
| ... | ... | @@ -168,29 +344,34 @@ class CasBasedDirectory(Directory): | 
| 168 | 344 |          self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
 | 
| 169 | 345 |          return newdir
 | 
| 170 | 346 |  
 | 
| 171 |  | -    def _add_new_file(self, basename, filename):
 | 
|  | 347 | +    def _add_file(self, basename, filename, modified=False):
 | 
| 172 | 348 |          filenode = self.pb2_directory.files.add()
 | 
| 173 | 349 |          filenode.name = filename
 | 
| 174 | 350 |          self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
 | 
| 175 | 351 |          is_executable = os.access(os.path.join(basename, filename), os.X_OK)
 | 
| 176 | 352 |          filenode.is_executable = is_executable
 | 
| 177 |  | -        self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
 | 
|  | 353 | +        self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
 | 
| 178 | 354 |  
 | 
| 179 |  | -    def _add_new_link(self, basename, filename):
 | 
| 180 |  | -        existing_link = self._find_pb2_entry(filename)
 | 
|  | 355 | +    def _copy_link_from_filesystem(self, basename, filename):
 | 
|  | 356 | +        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
 | 
|  | 357 | +
 | 
|  | 358 | +    def _add_new_link_direct(self, name, target):
 | 
|  | 359 | +        existing_link = self._find_pb2_entry(name)
 | 
| 181 | 360 |          if existing_link:
 | 
| 182 | 361 |              symlinknode = existing_link
 | 
| 183 | 362 |          else:
 | 
| 184 | 363 |              symlinknode = self.pb2_directory.symlinks.add()
 | 
| 185 |  | -        symlinknode.name = filename
 | 
|  | 364 | +        assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
 | 
|  | 365 | +        symlinknode.name = name
 | 
| 186 | 366 |          # A symlink node has no digest.
 | 
| 187 |  | -        symlinknode.target = os.readlink(os.path.join(basename, filename))
 | 
| 188 |  | -        self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
 | 
|  | 367 | +        symlinknode.target = target
 | 
|  | 368 | +        self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
 | 
| 189 | 369 |  
 | 
| 190 | 370 |      def delete_entry(self, name):
 | 
| 191 | 371 |          for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
 | 
| 192 |  | -            if name in collection:
 | 
| 193 |  | -                collection.remove(name)
 | 
|  | 372 | +            for thing in collection:
 | 
|  | 373 | +                if thing.name == name:
 | 
|  | 374 | +                    collection.remove(thing)
 | 
| 194 | 375 |          if name in self.index:
 | 
| 195 | 376 |              del self.index[name]
 | 
| 196 | 377 |  
 | 
| ... | ... | @@ -231,9 +412,13 @@ class CasBasedDirectory(Directory): | 
| 231 | 412 |              if isinstance(entry, CasBasedDirectory):
 | 
| 232 | 413 |                  return entry.descend(subdirectory_spec[1:], create)
 | 
| 233 | 414 |              else:
 | 
|  | 415 | +                # May be a symlink
 | 
|  | 416 | +                target = self._resolve(subdirectory_spec[0], force_create=create)
 | 
|  | 417 | +                if isinstance(target, CasBasedDirectory):
 | 
|  | 418 | +                    return target
 | 
| 234 | 419 |                  error = "Cannot descend into {}, which is a '{}' in the directory {}"
 | 
| 235 | 420 |                  raise VirtualDirectoryError(error.format(subdirectory_spec[0],
 | 
| 236 |  | -                                                         type(entry).__name__,
 | 
|  | 421 | +                                                         type(self.index[subdirectory_spec[0]].pb_object).__name__,
 | 
| 237 | 422 |                                                           self))
 | 
| 238 | 423 |          else:
 | 
| 239 | 424 |              if create:
 | 
| ... | ... | @@ -254,36 +439,9 @@ class CasBasedDirectory(Directory): | 
| 254 | 439 |          else:
 | 
| 255 | 440 |              return self
 | 
| 256 | 441 |  
 | 
| 257 |  | -    def _resolve_symlink_or_directory(self, name):
 | 
| 258 |  | -        """Used only by _import_files_from_directory. Tries to resolve a
 | 
| 259 |  | -        directory name or symlink name. 'name' must be an entry in this
 | 
| 260 |  | -        directory. It must be a single symlink or directory name, not a path
 | 
| 261 |  | -        separated by path separators. If it's an existing directory name, it
 | 
| 262 |  | -        just returns the Directory object for that. If it's a symlink, it will
 | 
| 263 |  | -        attempt to find the target of the symlink and return that as a
 | 
| 264 |  | -        Directory object.
 | 
| 265 |  | -
 | 
| 266 |  | -        If a symlink target doesn't exist, it will attempt to create it
 | 
| 267 |  | -        as a directory as long as it's within this directory tree.
 | 
| 268 |  | -        """
 | 
| 269 |  | -
 | 
| 270 |  | -        if isinstance(self.index[name].buildstream_object, Directory):
 | 
| 271 |  | -            return self.index[name].buildstream_object
 | 
| 272 |  | -        # OK then, it's a symlink
 | 
| 273 |  | -        symlink = self._find_pb2_entry(name)
 | 
| 274 |  | -        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
 | 
| 275 |  | -        if absolute:
 | 
| 276 |  | -            root = self.find_root()
 | 
| 277 |  | -        else:
 | 
| 278 |  | -            root = self
 | 
| 279 |  | -        directory = root
 | 
| 280 |  | -        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
 | 
| 281 |  | -        for c in components:
 | 
| 282 |  | -            if c == "..":
 | 
| 283 |  | -                directory = directory.parent
 | 
| 284 |  | -            else:
 | 
| 285 |  | -                directory = directory.descend(c, create=True)
 | 
| 286 |  | -        return directory
 | 
|  | 442 | +    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False):
 | 
|  | 443 | +        resolver = _Resolver(absolute_symlinks_resolve, force_create)
 | 
|  | 444 | +        return resolver.resolve(name, self)
 | 
| 287 | 445 |  
 | 
| 288 | 446 |      def _check_replacement(self, name, path_prefix, fileListResult):
 | 
| 289 | 447 |          """ Checks whether 'name' exists, and if so, whether we can overwrite it.
 | 
| ... | ... | @@ -297,6 +455,7 @@ class CasBasedDirectory(Directory): | 
| 297 | 455 |              return True
 | 
| 298 | 456 |          if (isinstance(existing_entry,
 | 
| 299 | 457 |                         (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
 | 
|  | 458 | +            self.delete_entry(name)
 | 
| 300 | 459 |              fileListResult.overwritten.append(relative_pathname)
 | 
| 301 | 460 |              return True
 | 
| 302 | 461 |          elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
 | 
| ... | ... | @@ -314,23 +473,44 @@ class CasBasedDirectory(Directory): | 
| 314 | 473 |                         .format(name, type(existing_entry)))
 | 
| 315 | 474 |          return False  # In case asserts are disabled
 | 
| 316 | 475 |  
 | 
| 317 |  | -    def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
 | 
| 318 |  | -        """ _import_directory_recursively and _import_files_from_directory will be called alternately
 | 
| 319 |  | -        as a directory tree is descended. """
 | 
| 320 |  | -        if directory_name in self.index:
 | 
| 321 |  | -            subdir = self._resolve_symlink_or_directory(directory_name)
 | 
| 322 |  | -        else:
 | 
| 323 |  | -            subdir = self._add_directory(directory_name)
 | 
| 324 |  | -        new_path_prefix = os.path.join(path_prefix, directory_name)
 | 
| 325 |  | -        subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
 | 
| 326 |  | -                                                            [os.path.sep.join(remaining_path)],
 | 
| 327 |  | -                                                            path_prefix=new_path_prefix)
 | 
| 328 |  | -        return subdir_result
 | 
|  | 476 | +    def _replace_anything_with_dir(self, name, path_prefix, overwritten_files_list):
 | 
|  | 477 | +        self.delete_entry(name)
 | 
|  | 478 | +        subdir = self._add_directory(name)
 | 
|  | 479 | +        overwritten_files_list.append(os.path.join(path_prefix, name))
 | 
|  | 480 | +        return subdir
 | 
| 329 | 481 |  
 | 
| 330 | 482 |      def _import_files_from_directory(self, source_directory, files, path_prefix=""):
 | 
| 331 |  | -        """ Imports files from a traditional directory """
 | 
|  | 483 | +        """ Imports files from a traditional directory. """
 | 
|  | 484 | +
 | 
|  | 485 | +        def _ensure_followable(name, path_prefix):
 | 
|  | 486 | +            """ Makes sure 'name' is a directory or symlink to a directory which can be descended into. """
 | 
|  | 487 | +            if isinstance(self.index[name].buildstream_object, Directory):
 | 
|  | 488 | +                return self.descend(name)
 | 
|  | 489 | +            try:
 | 
|  | 490 | +                target = self._resolve(name, force_create=True)
 | 
|  | 491 | +            except InfiniteSymlinkException:
 | 
|  | 492 | +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
 | 
|  | 493 | +            if isinstance(target, CasBasedDirectory):
 | 
|  | 494 | +                return target
 | 
|  | 495 | +            elif isinstance(target, remote_execution_pb2.FileNode):
 | 
|  | 496 | +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
 | 
|  | 497 | +            return target
 | 
|  | 498 | +
 | 
|  | 499 | +        def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix):
 | 
|  | 500 | +            """ _import_directory_recursively and _import_files_from_directory will be called alternately
 | 
|  | 501 | +            as a directory tree is descended. """
 | 
|  | 502 | +            if directory_name in self.index:
 | 
|  | 503 | +                subdir = _ensure_followable(directory_name, path_prefix)
 | 
|  | 504 | +            else:
 | 
|  | 505 | +                subdir = self._add_directory(directory_name)
 | 
|  | 506 | +            new_path_prefix = os.path.join(path_prefix, directory_name)
 | 
|  | 507 | +            subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
 | 
|  | 508 | +                                                                [os.path.sep.join(remaining_path)],
 | 
|  | 509 | +                                                                path_prefix=new_path_prefix)
 | 
|  | 510 | +            return subdir_result
 | 
|  | 511 | +
 | 
| 332 | 512 |          result = FileListResult()
 | 
| 333 |  | -        for entry in sorted(files):
 | 
|  | 513 | +        for entry in files:
 | 
| 334 | 514 |              split_path = entry.split(os.path.sep)
 | 
| 335 | 515 |              # The actual file on the FS we're importing
 | 
| 336 | 516 |              import_file = os.path.join(source_directory, entry)
 | 
| ... | ... | @@ -338,14 +518,18 @@ class CasBasedDirectory(Directory): | 
| 338 | 518 |              relative_pathname = os.path.join(path_prefix, entry)
 | 
| 339 | 519 |              if len(split_path) > 1:
 | 
| 340 | 520 |                  directory_name = split_path[0]
 | 
| 341 |  | -                # Hand this off to the importer for that subdir. This will only do one file -
 | 
| 342 |  | -                # a better way would be to hand off all the files in this subdir at once.
 | 
| 343 |  | -                subdir_result = self._import_directory_recursively(directory_name, source_directory,
 | 
| 344 |  | -                                                                   split_path[1:], path_prefix)
 | 
|  | 521 | +                # Hand this off to the importer for that subdir.
 | 
|  | 522 | +
 | 
|  | 523 | +                # It would be advantageous to batch these together by
 | 
|  | 524 | +                # directory_name. However, we can't do it out of
 | 
|  | 525 | +                # order, since importing symlinks affects the results
 | 
|  | 526 | +                # of other imports.
 | 
|  | 527 | +                subdir_result = _import_directory_recursively(directory_name, source_directory,
 | 
|  | 528 | +                                                              split_path[1:], path_prefix)
 | 
| 345 | 529 |                  result.combine(subdir_result)
 | 
| 346 | 530 |              elif os.path.islink(import_file):
 | 
| 347 | 531 |                  if self._check_replacement(entry, path_prefix, result):
 | 
| 348 |  | -                    self._add_new_link(source_directory, entry)
 | 
|  | 532 | +                    self._copy_link_from_filesystem(source_directory, entry)
 | 
| 349 | 533 |                      result.files_written.append(relative_pathname)
 | 
| 350 | 534 |              elif os.path.isdir(import_file):
 | 
| 351 | 535 |                  # A plain directory which already exists isn't a problem; just ignore it.
 | 
| ... | ... | @@ -353,10 +537,78 @@ class CasBasedDirectory(Directory): | 
| 353 | 537 |                      self._add_directory(entry)
 | 
| 354 | 538 |              elif os.path.isfile(import_file):
 | 
| 355 | 539 |                  if self._check_replacement(entry, path_prefix, result):
 | 
| 356 |  | -                    self._add_new_file(source_directory, entry)
 | 
|  | 540 | +                    self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten)
 | 
| 357 | 541 |                      result.files_written.append(relative_pathname)
 | 
| 358 | 542 |          return result
 | 
| 359 | 543 |  
 | 
|  | 544 | +    @staticmethod
 | 
|  | 545 | +    def _files_in_subdir(sorted_files, dirname):
 | 
|  | 546 | +        """Filters sorted_files and returns only the ones which have
 | 
|  | 547 | +           'dirname' as a prefix, with that prefix removed.
 | 
|  | 548 | +
 | 
|  | 549 | +        """
 | 
|  | 550 | +        if not dirname.endswith(os.path.sep):
 | 
|  | 551 | +            dirname += os.path.sep
 | 
|  | 552 | +        return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
 | 
|  | 553 | +
 | 
|  | 554 | +    def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
 | 
|  | 555 | +        """ Import only the files and symlinks listed in 'files' from source_directory to this one.
 | 
|  | 556 | +        Args:
 | 
|  | 557 | +           source_directory (:class:`.CasBasedDirectory`): The directory to import from
 | 
|  | 558 | +           files ([str]): List of pathnames to import. Must be a list, not a generator.
 | 
|  | 559 | +           path_prefix (str): Prefix used to add entries to the file list result.
 | 
|  | 560 | +           file_list_required: Whether to update the file list while processing.
 | 
|  | 561 | +        """
 | 
|  | 562 | +        result = FileListResult()
 | 
|  | 563 | +        processed_directories = set()
 | 
|  | 564 | +        for f in files:
 | 
|  | 565 | +            fullname = os.path.join(path_prefix, f)
 | 
|  | 566 | +            components = f.split(os.path.sep)
 | 
|  | 567 | +            if len(components) > 1:
 | 
|  | 568 | +                # We are importing a thing which is in a subdirectory. We may have already seen this dirname
 | 
|  | 569 | +                # for a previous file.
 | 
|  | 570 | +                dirname = components[0]
 | 
|  | 571 | +                if dirname not in processed_directories:
 | 
|  | 572 | +                    # Now strip off the first directory name and import files recursively.
 | 
|  | 573 | +                    subcomponents = CasBasedDirectory._files_in_subdir(files, dirname)
 | 
|  | 574 | +                    # We will fail at this point if there is a file or symlink to file called 'dirname'.
 | 
|  | 575 | +                    if dirname in self.index:
 | 
|  | 576 | +                        resolved_component = self._resolve(dirname, force_create=True)
 | 
|  | 577 | +                        if isinstance(resolved_component, remote_execution_pb2.FileNode):
 | 
|  | 578 | +                            dest_subdir = self._replace_anything_with_dir(dirname, path_prefix, result.overwritten)
 | 
|  | 579 | +                        else:
 | 
|  | 580 | +                            dest_subdir = resolved_component
 | 
|  | 581 | +                    else:
 | 
|  | 582 | +                        dest_subdir = self.descend(dirname, create=True)
 | 
|  | 583 | +                    src_subdir = source_directory.descend(dirname)
 | 
|  | 584 | +                    import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
 | 
|  | 585 | +                                                                             path_prefix=fullname,
 | 
|  | 586 | +                                                                             file_list_required=file_list_required)
 | 
|  | 587 | +                    result.combine(import_result)
 | 
|  | 588 | +                processed_directories.add(dirname)
 | 
|  | 589 | +            elif isinstance(source_directory.index[f].buildstream_object, CasBasedDirectory):
 | 
|  | 590 | +                # The thing in the input file list is a directory on
 | 
|  | 591 | +                # its own. We don't need to do anything other than create it if it doesn't exist.
 | 
|  | 592 | +                # If we already have an entry with the same name that isn't a directory, that
 | 
|  | 593 | +                # will be dealt with when importing files in this directory.
 | 
|  | 594 | +                if f not in self.index:
 | 
|  | 595 | +                    self.descend(f, create=True)
 | 
|  | 596 | +            else:
 | 
|  | 597 | +                # We're importing a file or symlink - replace anything with the same name.
 | 
|  | 598 | +                importable = self._check_replacement(f, path_prefix, result)
 | 
|  | 599 | +                if importable:
 | 
|  | 600 | +                    item = source_directory.index[f].pb_object
 | 
|  | 601 | +                    if isinstance(item, remote_execution_pb2.FileNode):
 | 
|  | 602 | +                        filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
 | 
|  | 603 | +                                                                is_executable=item.is_executable)
 | 
|  | 604 | +                        self.index[f] = IndexEntry(filenode, modified=True)
 | 
|  | 605 | +                    else:
 | 
|  | 606 | +                        assert isinstance(item, remote_execution_pb2.SymlinkNode)
 | 
|  | 607 | +                        self._add_new_link_direct(name=f, target=item.target)
 | 
|  | 608 | +                else:
 | 
|  | 609 | +                    result.ignored.append(os.path.join(path_prefix, f))
 | 
|  | 610 | +        return result
 | 
|  | 611 | +
 | 
| 360 | 612 |      def import_files(self, external_pathspec, *, files=None,
 | 
| 361 | 613 |                       report_written=True, update_utimes=False,
 | 
| 362 | 614 |                       can_link=False):
 | 
| ... | ... | @@ -378,28 +630,27 @@ class CasBasedDirectory(Directory): | 
| 378 | 630 |  
 | 
| 379 | 631 |          can_link (bool): Ignored, since hard links do not have any meaning within CAS.
 | 
| 380 | 632 |          """
 | 
| 381 |  | -        if isinstance(external_pathspec, FileBasedDirectory):
 | 
| 382 |  | -            source_directory = external_pathspec._get_underlying_directory()
 | 
| 383 |  | -        elif isinstance(external_pathspec, CasBasedDirectory):
 | 
| 384 |  | -            # TODO: This transfers from one CAS to another via the
 | 
| 385 |  | -            # filesystem, which is very inefficient. Alter this so it
 | 
| 386 |  | -            # transfers refs across directly.
 | 
| 387 |  | -            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
 | 
| 388 |  | -                external_pathspec.export_files(tmpdir)
 | 
| 389 |  | -                if files is None:
 | 
| 390 |  | -                    files = list_relative_paths(tmpdir)
 | 
| 391 |  | -                result = self._import_files_from_directory(tmpdir, files=files)
 | 
| 392 |  | -            return result
 | 
| 393 |  | -        else:
 | 
| 394 |  | -            source_directory = external_pathspec
 | 
| 395 | 633 |  
 | 
| 396 | 634 |          if files is None:
 | 
| 397 |  | -            files = list_relative_paths(source_directory)
 | 
|  | 635 | +            if isinstance(external_pathspec, str):
 | 
|  | 636 | +                files = list_relative_paths(external_pathspec)
 | 
|  | 637 | +            else:
 | 
|  | 638 | +                assert isinstance(external_pathspec, Directory)
 | 
|  | 639 | +                files = external_pathspec.list_relative_paths()
 | 
|  | 640 | +
 | 
|  | 641 | +        if isinstance(external_pathspec, FileBasedDirectory):
 | 
|  | 642 | +            source_directory = external_pathspec.get_underlying_directory()
 | 
|  | 643 | +            result = self._import_files_from_directory(source_directory, files=files)
 | 
|  | 644 | +        elif isinstance(external_pathspec, str):
 | 
|  | 645 | +            source_directory = external_pathspec
 | 
|  | 646 | +            result = self._import_files_from_directory(source_directory, files=files)
 | 
|  | 647 | +        else:
 | 
|  | 648 | +            assert isinstance(external_pathspec, CasBasedDirectory)
 | 
|  | 649 | +            result = self._partial_import_cas_into_cas(external_pathspec, files=list(files))
 | 
| 398 | 650 |  
 | 
| 399 | 651 |          # TODO: No notice is taken of report_written, update_utimes or can_link.
 | 
| 400 | 652 |          # Current behaviour is to fully populate the report, which is inefficient,
 | 
| 401 | 653 |          # but still correct.
 | 
| 402 |  | -        result = self._import_files_from_directory(source_directory, files=files)
 | 
| 403 | 654 |  
 | 
| 404 | 655 |          # We need to recalculate and store the hashes of all directories both
 | 
| 405 | 656 |          # up and down the tree; we have changed our directory by importing files
 | 
| ... | ... | @@ -511,6 +762,28 @@ class CasBasedDirectory(Directory): | 
| 511 | 762 |          else:
 | 
| 512 | 763 |              self._mark_directory_unmodified()
 | 
| 513 | 764 |  
 | 
|  | 765 | +    def _lightweight_resolve_to_index(self, path):
 | 
|  | 766 | +        """A lightweight function for transforming paths into IndexEntry
 | 
|  | 767 | +        objects. This does not follow symlinks.
 | 
|  | 768 | +
 | 
|  | 769 | +        path: The string to resolve. This should be a series of path
 | 
|  | 770 | +        components separated by the protocol buffer path separator
 | 
|  | 771 | +        _pb2_path_sep.
 | 
|  | 772 | +
 | 
|  | 773 | +        Returns: the IndexEntry found, or None if any of the path components were not present.
 | 
|  | 774 | +
 | 
|  | 775 | +        """
 | 
|  | 776 | +        directory = self
 | 
|  | 777 | +        path_components = path.split(CasBasedDirectory._pb2_path_sep)
 | 
|  | 778 | +        for component in path_components[:-1]:
 | 
|  | 779 | +            if component not in directory.index:
 | 
|  | 780 | +                return None
 | 
|  | 781 | +            if isinstance(directory.index[component].buildstream_object, CasBasedDirectory):
 | 
|  | 782 | +                directory = directory.index[component].buildstream_object
 | 
|  | 783 | +            else:
 | 
|  | 784 | +                return None
 | 
|  | 785 | +        return directory.index.get(path_components[-1], None)
 | 
|  | 786 | +
 | 
| 514 | 787 |      def list_modified_paths(self):
 | 
| 515 | 788 |          """Provide a list of relative paths which have been modified since the
 | 
| 516 | 789 |          last call to mark_unmodified.
 | 
| ... | ... | @@ -518,29 +791,43 @@ class CasBasedDirectory(Directory): | 
| 518 | 791 |          Return value: List(str) - list of modified paths
 | 
| 519 | 792 |          """
 | 
| 520 | 793 |  
 | 
| 521 |  | -        filelist = []
 | 
| 522 |  | -        for (k, v) in self.index.items():
 | 
| 523 |  | -            if isinstance(v.buildstream_object, CasBasedDirectory):
 | 
| 524 |  | -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
 | 
| 525 |  | -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
 | 
| 526 |  | -                filelist.append(k)
 | 
| 527 |  | -        return filelist
 | 
|  | 794 | +        for p in self.list_relative_paths():
 | 
|  | 795 | +            i = self._lightweight_resolve_to_index(p)
 | 
|  | 796 | +            if i and i.modified:
 | 
|  | 797 | +                yield p
 | 
| 528 | 798 |  
 | 
| 529 |  | -    def list_relative_paths(self):
 | 
|  | 799 | +    def list_relative_paths(self, relpath=""):
 | 
| 530 | 800 |          """Provide a list of all relative paths.
 | 
| 531 | 801 |  
 | 
| 532 |  | -        NOTE: This list is not in the same order as utils.list_relative_paths.
 | 
| 533 |  | -
 | 
| 534 | 802 |          Return value: List(str) - list of all paths
 | 
| 535 | 803 |          """
 | 
| 536 | 804 |  
 | 
| 537 |  | -        filelist = []
 | 
| 538 |  | -        for (k, v) in self.index.items():
 | 
| 539 |  | -            if isinstance(v.buildstream_object, CasBasedDirectory):
 | 
| 540 |  | -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
 | 
| 541 |  | -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
 | 
| 542 |  | -                filelist.append(k)
 | 
| 543 |  | -        return filelist
 | 
|  | 805 | +        symlink_list = filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.SymlinkNode),
 | 
|  | 806 | +                              self.index.items())
 | 
|  | 807 | +        file_list = list(filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.FileNode),
 | 
|  | 808 | +                                self.index.items()))
 | 
|  | 809 | +        directory_list = filter(lambda i: isinstance(i[1].buildstream_object, CasBasedDirectory),
 | 
|  | 810 | +                                self.index.items())
 | 
|  | 811 | +
 | 
|  | 812 | +        # We need to mimic the behaviour of os.walk, in which symlinks
 | 
|  | 813 | +        # to directories count as directories and symlinks to file or
 | 
|  | 814 | +        # broken symlinks count as files. os.walk doesn't follow
 | 
|  | 815 | +        # symlinks, so we don't recurse.
 | 
|  | 816 | +        for (k, v) in sorted(symlink_list):
 | 
|  | 817 | +            target = self._resolve(k, absolute_symlinks_resolve=True)
 | 
|  | 818 | +            if isinstance(target, CasBasedDirectory):
 | 
|  | 819 | +                yield os.path.join(relpath, k)
 | 
|  | 820 | +            else:
 | 
|  | 821 | +                file_list.append((k, v))
 | 
|  | 822 | +
 | 
|  | 823 | +        if file_list == [] and relpath != "":
 | 
|  | 824 | +            yield relpath
 | 
|  | 825 | +        else:
 | 
|  | 826 | +            for (k, v) in sorted(file_list):
 | 
|  | 827 | +                yield os.path.join(relpath, k)
 | 
|  | 828 | +
 | 
|  | 829 | +        for (k, v) in sorted(directory_list):
 | 
|  | 830 | +            yield from v.buildstream_object.list_relative_paths(relpath=os.path.join(relpath, k))
 | 
| 544 | 831 |  
 | 
| 545 | 832 |      def recalculate_hash(self):
 | 
| 546 | 833 |          """ Recalcuates the hash for this directory and store the results in
 |