| ... | ... | @@ -30,7 +30,6 @@ See also: :ref:`sandboxing`. | 
| 30 | 30 |  from collections import OrderedDict
 | 
| 31 | 31 |  
 | 
| 32 | 32 |  import os
 | 
| 33 |  | -import tempfile
 | 
| 34 | 33 |  import stat
 | 
| 35 | 34 |  
 | 
| 36 | 35 |  from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
 | 
| ... | ... | @@ -51,6 +50,24 @@ class IndexEntry(): | 
| 51 | 50 |          self.modified = modified
 | 
| 52 | 51 |  
 | 
| 53 | 52 |  
 | 
|  | 53 | +class ResolutionException(Exception):
 | 
|  | 54 | +    """ Superclass of all exceptions that can be raised by
 | 
|  | 55 | +    CasBasedDirectory._resolve. Should not be used outside this module. """
 | 
|  | 56 | +    pass
 | 
|  | 57 | +
 | 
|  | 58 | +
 | 
|  | 59 | +class InfiniteSymlinkException(ResolutionException):
 | 
|  | 60 | +    """ Raised when an infinite symlink loop is found. """
 | 
|  | 61 | +    pass
 | 
|  | 62 | +
 | 
|  | 63 | +
 | 
|  | 64 | +class AbsoluteSymlinkException(ResolutionException):
 | 
|  | 65 | +    """Raised if we try to follow an absolute symlink (i.e. one whose
 | 
|  | 66 | +    target starts with the path separator) and we have disallowed
 | 
|  | 67 | +    following such symlinks. """
 | 
|  | 68 | +    pass
 | 
|  | 69 | +
 | 
|  | 70 | +
 | 
| 54 | 71 |  # CasBasedDirectory intentionally doesn't call its superclass constuctor,
 | 
| 55 | 72 |  # which is meant to be unimplemented.
 | 
| 56 | 73 |  # pylint: disable=super-init-not-called
 | 
| ... | ... | @@ -168,29 +185,34 @@ class CasBasedDirectory(Directory): | 
| 168 | 185 |          self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
 | 
| 169 | 186 |          return newdir
 | 
| 170 | 187 |  
 | 
| 171 |  | -    def _add_new_file(self, basename, filename):
 | 
|  | 188 | +    def _add_file(self, basename, filename, modified=False):
 | 
| 172 | 189 |          filenode = self.pb2_directory.files.add()
 | 
| 173 | 190 |          filenode.name = filename
 | 
| 174 | 191 |          self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
 | 
| 175 | 192 |          is_executable = os.access(os.path.join(basename, filename), os.X_OK)
 | 
| 176 | 193 |          filenode.is_executable = is_executable
 | 
| 177 |  | -        self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
 | 
|  | 194 | +        self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
 | 
|  | 195 | +
 | 
|  | 196 | +    def _copy_link_from_filesystem(self, basename, filename):
 | 
|  | 197 | +        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
 | 
| 178 | 198 |  
 | 
| 179 |  | -    def _add_new_link(self, basename, filename):
 | 
| 180 |  | -        existing_link = self._find_pb2_entry(filename)
 | 
|  | 199 | +    def _add_new_link_direct(self, name, target):
 | 
|  | 200 | +        existing_link = self._find_pb2_entry(name)
 | 
| 181 | 201 |          if existing_link:
 | 
| 182 | 202 |              symlinknode = existing_link
 | 
| 183 | 203 |          else:
 | 
| 184 | 204 |              symlinknode = self.pb2_directory.symlinks.add()
 | 
| 185 |  | -        symlinknode.name = filename
 | 
|  | 205 | +        assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
 | 
|  | 206 | +        symlinknode.name = name
 | 
| 186 | 207 |          # A symlink node has no digest.
 | 
| 187 |  | -        symlinknode.target = os.readlink(os.path.join(basename, filename))
 | 
| 188 |  | -        self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
 | 
|  | 208 | +        symlinknode.target = target
 | 
|  | 209 | +        self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
 | 
| 189 | 210 |  
 | 
| 190 | 211 |      def delete_entry(self, name):
 | 
| 191 | 212 |          for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
 | 
| 192 |  | -            if name in collection:
 | 
| 193 |  | -                collection.remove(name)
 | 
|  | 213 | +            for thing in collection:
 | 
|  | 214 | +                if thing.name == name:
 | 
|  | 215 | +                    collection.remove(thing)
 | 
| 194 | 216 |          if name in self.index:
 | 
| 195 | 217 |              del self.index[name]
 | 
| 196 | 218 |  
 | 
| ... | ... | @@ -231,9 +253,13 @@ class CasBasedDirectory(Directory): | 
| 231 | 253 |              if isinstance(entry, CasBasedDirectory):
 | 
| 232 | 254 |                  return entry.descend(subdirectory_spec[1:], create)
 | 
| 233 | 255 |              else:
 | 
|  | 256 | +                # May be a symlink
 | 
|  | 257 | +                target = self._resolve(subdirectory_spec[0], force_create=create)
 | 
|  | 258 | +                if isinstance(target, CasBasedDirectory):
 | 
|  | 259 | +                    return target
 | 
| 234 | 260 |                  error = "Cannot descend into {}, which is a '{}' in the directory {}"
 | 
| 235 | 261 |                  raise VirtualDirectoryError(error.format(subdirectory_spec[0],
 | 
| 236 |  | -                                                         type(entry).__name__,
 | 
|  | 262 | +                                                         type(self.index[subdirectory_spec[0]].pb_object).__name__,
 | 
| 237 | 263 |                                                           self))
 | 
| 238 | 264 |          else:
 | 
| 239 | 265 |              if create:
 | 
| ... | ... | @@ -254,36 +280,112 @@ class CasBasedDirectory(Directory): | 
| 254 | 280 |          else:
 | 
| 255 | 281 |              return self
 | 
| 256 | 282 |  
 | 
| 257 |  | -    def _resolve_symlink_or_directory(self, name):
 | 
| 258 |  | -        """Used only by _import_files_from_directory. Tries to resolve a
 | 
| 259 |  | -        directory name or symlink name. 'name' must be an entry in this
 | 
| 260 |  | -        directory. It must be a single symlink or directory name, not a path
 | 
| 261 |  | -        separated by path separators. If it's an existing directory name, it
 | 
| 262 |  | -        just returns the Directory object for that. If it's a symlink, it will
 | 
| 263 |  | -        attempt to find the target of the symlink and return that as a
 | 
| 264 |  | -        Directory object.
 | 
| 265 |  | -
 | 
| 266 |  | -        If a symlink target doesn't exist, it will attempt to create it
 | 
| 267 |  | -        as a directory as long as it's within this directory tree.
 | 
|  | 283 | +    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False, seen_objects=None):
 | 
|  | 284 | +        """Resolves any name to an object. If the name points to a symlink in
 | 
|  | 285 | +        this directory, it returns the thing it points to,
 | 
|  | 286 | +        recursively.
 | 
|  | 287 | +
 | 
|  | 288 | +        Returns a CasBasedDirectory, FileNode or None. None indicates
 | 
|  | 289 | +        either that 'target' does not exist in this directory, or is a
 | 
|  | 290 | +        symlink chain which points to a nonexistent name (broken
 | 
|  | 291 | +        symlink).
 | 
|  | 292 | +
 | 
|  | 293 | +
 | 
|  | 294 | +        Raises:
 | 
|  | 295 | +        - InfiniteSymlinkException if 'name' points to an infinite symlink loop.
 | 
|  | 296 | +        - AbsoluteSymlinkException if 'name' points to an absolute symlink and absolute_symlinks_resolve is False.
 | 
|  | 297 | +
 | 
|  | 298 | +        If force_create is on, this will attempt to create directories to make symlinks and directories resolve.
 | 
|  | 299 | +        If force_create is off, this will never alter this directory.
 | 
|  | 300 | +
 | 
| 268 | 301 |          """
 | 
| 269 | 302 |  
 | 
| 270 |  | -        if isinstance(self.index[name].buildstream_object, Directory):
 | 
| 271 |  | -            return self.index[name].buildstream_object
 | 
| 272 |  | -        # OK then, it's a symlink
 | 
| 273 |  | -        symlink = self._find_pb2_entry(name)
 | 
|  | 303 | +        if name not in self.index:
 | 
|  | 304 | +            return None
 | 
|  | 305 | +
 | 
|  | 306 | +        # First check if it's a normal object and return that
 | 
|  | 307 | +        index_entry = self.index[name]
 | 
|  | 308 | +        if isinstance(index_entry.buildstream_object, Directory):
 | 
|  | 309 | +            return index_entry.buildstream_object
 | 
|  | 310 | +        elif isinstance(index_entry.pb_object, remote_execution_pb2.FileNode):
 | 
|  | 311 | +            return index_entry.pb_object
 | 
|  | 312 | +
 | 
|  | 313 | +        assert isinstance(index_entry.pb_object, remote_execution_pb2.SymlinkNode)
 | 
|  | 314 | +
 | 
|  | 315 | +        if seen_objects is None:
 | 
|  | 316 | +            seen_objects = [index_entry.pb_object]
 | 
|  | 317 | +        else:
 | 
|  | 318 | +            if index_entry.pb_object in seen_objects:
 | 
|  | 319 | +                # Infinite symlink loop detected
 | 
|  | 320 | +                raise InfiniteSymlinkException()
 | 
|  | 321 | +
 | 
|  | 322 | +        symlink = index_entry.pb_object
 | 
|  | 323 | +        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
 | 
|  | 324 | +
 | 
| 274 | 325 |          absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
 | 
| 275 | 326 |          if absolute:
 | 
| 276 |  | -            root = self.find_root()
 | 
|  | 327 | +            if absolute_symlinks_resolve:
 | 
|  | 328 | +                start_directory = self.find_root()
 | 
|  | 329 | +                # Discard the first empty element
 | 
|  | 330 | +                components.pop(0)
 | 
|  | 331 | +            else:
 | 
|  | 332 | +                # Unresolvable absolute symlink
 | 
|  | 333 | +                raise AbsoluteSymlinkException()
 | 
| 277 | 334 |          else:
 | 
| 278 |  | -            root = self
 | 
| 279 |  | -        directory = root
 | 
| 280 |  | -        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
 | 
| 281 |  | -        for c in components:
 | 
| 282 |  | -            if c == "..":
 | 
| 283 |  | -                directory = directory.parent
 | 
|  | 335 | +            start_directory = self
 | 
|  | 336 | +
 | 
|  | 337 | +        directory = start_directory
 | 
|  | 338 | +        while True:
 | 
|  | 339 | +            if not components:
 | 
|  | 340 | +                # If we run out of components, the directory we're currently in
 | 
|  | 341 | +                # is the resolved component.
 | 
|  | 342 | +                return directory
 | 
|  | 343 | +
 | 
|  | 344 | +            c = components.pop(0)
 | 
|  | 345 | +            if c == ".":
 | 
|  | 346 | +                pass
 | 
|  | 347 | +            elif c == "..":
 | 
|  | 348 | +                if directory.parent is not None:
 | 
|  | 349 | +                    directory = directory.parent
 | 
|  | 350 | +                # If directory.parent *is* None, this is an attempt to access
 | 
|  | 351 | +                # '..' from the root, which is valid under POSIX; it just
 | 
|  | 352 | +                # returns the root.
 | 
|  | 353 | +            elif c in directory.index:
 | 
|  | 354 | +                # Recursive resolve and continue
 | 
|  | 355 | +                try:
 | 
|  | 356 | +                    f = directory._resolve(c, absolute_symlinks_resolve, seen_objects=seen_objects,
 | 
|  | 357 | +                                           force_create=force_create)
 | 
|  | 358 | +                except ResolutionException:
 | 
|  | 359 | +                    return None
 | 
|  | 360 | +                if isinstance(f, CasBasedDirectory):
 | 
|  | 361 | +                    directory = f
 | 
|  | 362 | +                elif isinstance(f, remote_execution_pb2.FileNode):
 | 
|  | 363 | +                    if components:
 | 
|  | 364 | +                        # We have components still to resolve, but one of the path components
 | 
|  | 365 | +                        # is a file.
 | 
|  | 366 | +                        if force_create:
 | 
|  | 367 | +                            self.delete_entry(c)
 | 
|  | 368 | +                            directory = directory.descend(c, create=True)
 | 
|  | 369 | +                        else:
 | 
|  | 370 | +                            errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
 | 
|  | 371 | +                                        "cannot proceed. The remaining path components are {}.")
 | 
|  | 372 | +                            raise ResolutionException(errormsg.format(c, components))
 | 
|  | 373 | +                    else:
 | 
|  | 374 | +                        # It's a file, and there's no path components left, so just return that.
 | 
|  | 375 | +                        return f
 | 
|  | 376 | +                else:
 | 
|  | 377 | +                    # f was not found, or wasn't resolvable
 | 
|  | 378 | +                    if force_create:
 | 
|  | 379 | +                        directory = directory.descend(c, create=True)
 | 
|  | 380 | +                    else:
 | 
|  | 381 | +                        return None
 | 
| 284 | 382 |              else:
 | 
| 285 |  | -                directory = directory.descend(c, create=True)
 | 
| 286 |  | -        return directory
 | 
|  | 383 | +                # c is not in our index
 | 
|  | 384 | +                if force_create:
 | 
|  | 385 | +                    directory = directory.descend(c, create=True)
 | 
|  | 386 | +                else:
 | 
|  | 387 | +                    return None
 | 
|  | 388 | +        # You can only exit the while loop with a return, or exception, so you shouldn't be here.
 | 
| 287 | 389 |  
 | 
| 288 | 390 |      def _check_replacement(self, name, path_prefix, fileListResult):
 | 
| 289 | 391 |          """ Checks whether 'name' exists, and if so, whether we can overwrite it.
 | 
| ... | ... | @@ -297,6 +399,7 @@ class CasBasedDirectory(Directory): | 
| 297 | 399 |              return True
 | 
| 298 | 400 |          if (isinstance(existing_entry,
 | 
| 299 | 401 |                         (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
 | 
|  | 402 | +            self.delete_entry(name)
 | 
| 300 | 403 |              fileListResult.overwritten.append(relative_pathname)
 | 
| 301 | 404 |              return True
 | 
| 302 | 405 |          elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
 | 
| ... | ... | @@ -314,23 +417,44 @@ class CasBasedDirectory(Directory): | 
| 314 | 417 |                         .format(name, type(existing_entry)))
 | 
| 315 | 418 |          return False  # In case asserts are disabled
 | 
| 316 | 419 |  
 | 
| 317 |  | -    def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
 | 
| 318 |  | -        """ _import_directory_recursively and _import_files_from_directory will be called alternately
 | 
| 319 |  | -        as a directory tree is descended. """
 | 
| 320 |  | -        if directory_name in self.index:
 | 
| 321 |  | -            subdir = self._resolve_symlink_or_directory(directory_name)
 | 
| 322 |  | -        else:
 | 
| 323 |  | -            subdir = self._add_directory(directory_name)
 | 
| 324 |  | -        new_path_prefix = os.path.join(path_prefix, directory_name)
 | 
| 325 |  | -        subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
 | 
| 326 |  | -                                                            [os.path.sep.join(remaining_path)],
 | 
| 327 |  | -                                                            path_prefix=new_path_prefix)
 | 
| 328 |  | -        return subdir_result
 | 
|  | 420 | +    def _replace_anything_with_dir(self, name, path_prefix, overwritten_files_list):
 | 
|  | 421 | +        self.delete_entry(name)
 | 
|  | 422 | +        subdir = self._add_directory(name)
 | 
|  | 423 | +        overwritten_files_list.append(os.path.join(path_prefix, name))
 | 
|  | 424 | +        return subdir
 | 
| 329 | 425 |  
 | 
| 330 | 426 |      def _import_files_from_directory(self, source_directory, files, path_prefix=""):
 | 
| 331 |  | -        """ Imports files from a traditional directory """
 | 
|  | 427 | +        """ Imports files from a traditional directory. """
 | 
|  | 428 | +
 | 
|  | 429 | +        def _ensure_followable(name, path_prefix):
 | 
|  | 430 | +            """ Makes sure 'name' is a directory or symlink to a directory which can be descended into. """
 | 
|  | 431 | +            if isinstance(self.index[name].buildstream_object, Directory):
 | 
|  | 432 | +                return self.descend(name)
 | 
|  | 433 | +            try:
 | 
|  | 434 | +                target = self._resolve(name, force_create=True)
 | 
|  | 435 | +            except InfiniteSymlinkException:
 | 
|  | 436 | +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
 | 
|  | 437 | +            if isinstance(target, CasBasedDirectory):
 | 
|  | 438 | +                return target
 | 
|  | 439 | +            elif isinstance(target, remote_execution_pb2.FileNode):
 | 
|  | 440 | +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
 | 
|  | 441 | +            return target
 | 
|  | 442 | +
 | 
|  | 443 | +        def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix):
 | 
|  | 444 | +            """ _import_directory_recursively and _import_files_from_directory will be called alternately
 | 
|  | 445 | +            as a directory tree is descended. """
 | 
|  | 446 | +            if directory_name in self.index:
 | 
|  | 447 | +                subdir = _ensure_followable(directory_name, path_prefix)
 | 
|  | 448 | +            else:
 | 
|  | 449 | +                subdir = self._add_directory(directory_name)
 | 
|  | 450 | +            new_path_prefix = os.path.join(path_prefix, directory_name)
 | 
|  | 451 | +            subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
 | 
|  | 452 | +                                                                [os.path.sep.join(remaining_path)],
 | 
|  | 453 | +                                                                path_prefix=new_path_prefix)
 | 
|  | 454 | +            return subdir_result
 | 
|  | 455 | +
 | 
| 332 | 456 |          result = FileListResult()
 | 
| 333 |  | -        for entry in sorted(files):
 | 
|  | 457 | +        for entry in files:
 | 
| 334 | 458 |              split_path = entry.split(os.path.sep)
 | 
| 335 | 459 |              # The actual file on the FS we're importing
 | 
| 336 | 460 |              import_file = os.path.join(source_directory, entry)
 | 
| ... | ... | @@ -338,14 +462,18 @@ class CasBasedDirectory(Directory): | 
| 338 | 462 |              relative_pathname = os.path.join(path_prefix, entry)
 | 
| 339 | 463 |              if len(split_path) > 1:
 | 
| 340 | 464 |                  directory_name = split_path[0]
 | 
| 341 |  | -                # Hand this off to the importer for that subdir. This will only do one file -
 | 
| 342 |  | -                # a better way would be to hand off all the files in this subdir at once.
 | 
| 343 |  | -                subdir_result = self._import_directory_recursively(directory_name, source_directory,
 | 
| 344 |  | -                                                                   split_path[1:], path_prefix)
 | 
|  | 465 | +                # Hand this off to the importer for that subdir.
 | 
|  | 466 | +
 | 
|  | 467 | +                # It would be advantageous to batch these together by
 | 
|  | 468 | +                # directory_name. However, we can't do it out of
 | 
|  | 469 | +                # order, since importing symlinks affects the results
 | 
|  | 470 | +                # of other imports.
 | 
|  | 471 | +                subdir_result = _import_directory_recursively(directory_name, source_directory,
 | 
|  | 472 | +                                                              split_path[1:], path_prefix)
 | 
| 345 | 473 |                  result.combine(subdir_result)
 | 
| 346 | 474 |              elif os.path.islink(import_file):
 | 
| 347 | 475 |                  if self._check_replacement(entry, path_prefix, result):
 | 
| 348 |  | -                    self._add_new_link(source_directory, entry)
 | 
|  | 476 | +                    self._copy_link_from_filesystem(source_directory, entry)
 | 
| 349 | 477 |                      result.files_written.append(relative_pathname)
 | 
| 350 | 478 |              elif os.path.isdir(import_file):
 | 
| 351 | 479 |                  # A plain directory which already exists isn't a problem; just ignore it.
 | 
| ... | ... | @@ -353,10 +481,86 @@ class CasBasedDirectory(Directory): | 
| 353 | 481 |                      self._add_directory(entry)
 | 
| 354 | 482 |              elif os.path.isfile(import_file):
 | 
| 355 | 483 |                  if self._check_replacement(entry, path_prefix, result):
 | 
| 356 |  | -                    self._add_new_file(source_directory, entry)
 | 
|  | 484 | +                    self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten)
 | 
| 357 | 485 |                      result.files_written.append(relative_pathname)
 | 
| 358 | 486 |          return result
 | 
| 359 | 487 |  
 | 
|  | 488 | +    @staticmethod
 | 
|  | 489 | +    def _files_in_subdir(sorted_files, dirname):
 | 
|  | 490 | +        """Filters sorted_files and returns only the ones which have
 | 
|  | 491 | +           'dirname' as a prefix, with that prefix removed.
 | 
|  | 492 | +
 | 
|  | 493 | +        """
 | 
|  | 494 | +        if not dirname.endswith(os.path.sep):
 | 
|  | 495 | +            dirname += os.path.sep
 | 
|  | 496 | +        return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
 | 
|  | 497 | +
 | 
|  | 498 | +    def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
 | 
|  | 499 | +        """ Import only the files and symlinks listed in 'files' from source_directory to this one.
 | 
|  | 500 | +        Args:
 | 
|  | 501 | +           source_directory (:class:`.CasBasedDirectory`): The directory to import from
 | 
|  | 502 | +           files ([str]): List of pathnames to import.
 | 
|  | 503 | +           path_prefix (str): Prefix used to add entries to the file list result.
 | 
|  | 504 | +           file_list_required: Whether to update the file list while processing.
 | 
|  | 505 | +        """
 | 
|  | 506 | +        result = FileListResult()
 | 
|  | 507 | +        processed_directories = set()
 | 
|  | 508 | +        for f in files:
 | 
|  | 509 | +            fullname = os.path.join(path_prefix, f)
 | 
|  | 510 | +            components = f.split(os.path.sep)
 | 
|  | 511 | +            if len(components) > 1:
 | 
|  | 512 | +                # We are importing a thing which is in a subdirectory. We may have already seen this dirname
 | 
|  | 513 | +                # for a previous file.
 | 
|  | 514 | +                dirname = components[0]
 | 
|  | 515 | +                if dirname not in processed_directories:
 | 
|  | 516 | +                    # Now strip off the first directory name and import files recursively.
 | 
|  | 517 | +                    subcomponents = CasBasedDirectory._files_in_subdir(files, dirname)
 | 
|  | 518 | +                    # We will fail at this point if there is a file or symlink to file called 'dirname'.
 | 
|  | 519 | +                    if dirname in self.index:
 | 
|  | 520 | +                        resolved_component = self._resolve(dirname, force_create=True)
 | 
|  | 521 | +                        if isinstance(resolved_component, remote_execution_pb2.FileNode):
 | 
|  | 522 | +                            dest_subdir = self._replace_anything_with_dir(dirname, path_prefix, result.overwritten)
 | 
|  | 523 | +                        else:
 | 
|  | 524 | +                            dest_subdir = resolved_component
 | 
|  | 525 | +                    else:
 | 
|  | 526 | +                        dest_subdir = self.descend(dirname, create=True)
 | 
|  | 527 | +                    src_subdir = source_directory.descend(dirname)
 | 
|  | 528 | +                    import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
 | 
|  | 529 | +                                                                             path_prefix=fullname,
 | 
|  | 530 | +                                                                             file_list_required=file_list_required)
 | 
|  | 531 | +                    result.combine(import_result)
 | 
|  | 532 | +                processed_directories.add(dirname)
 | 
|  | 533 | +            elif isinstance(source_directory.index[f].buildstream_object, CasBasedDirectory):
 | 
|  | 534 | +                # The thing in the input file list is a directory on
 | 
|  | 535 | +                # its own. We don't need to do anything other than create it if it doesn't exist.
 | 
|  | 536 | +                # If we already have an entry with the same name that isn't a directory, that
 | 
|  | 537 | +                # will be dealt with when importing files in this directory.
 | 
|  | 538 | +                if f not in self.index:
 | 
|  | 539 | +                    self.descend(f, create=True)
 | 
|  | 540 | +            else:
 | 
|  | 541 | +                # We're importing a file or symlink - replace anything with the same name.
 | 
|  | 542 | +                importable = self._check_replacement(f, path_prefix, result)
 | 
|  | 543 | +                if importable:
 | 
|  | 544 | +                    item = source_directory.index[f].pb_object
 | 
|  | 545 | +                    if isinstance(item, remote_execution_pb2.FileNode):
 | 
|  | 546 | +                        filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
 | 
|  | 547 | +                                                                is_executable=item.is_executable)
 | 
|  | 548 | +                        self.index[f] = IndexEntry(filenode, modified=True)
 | 
|  | 549 | +                    else:
 | 
|  | 550 | +                        assert isinstance(item, remote_execution_pb2.SymlinkNode)
 | 
|  | 551 | +                        self._add_new_link_direct(name=f, target=item.target)
 | 
|  | 552 | +                else:
 | 
|  | 553 | +                    result.ignored.append(os.path.join(path_prefix, f))
 | 
|  | 554 | +        return result
 | 
|  | 555 | +
 | 
|  | 556 | +    def _import_cas_into_cas(self, source_directory, files=None):
 | 
|  | 557 | +        """ A full import is significantly quicker than a partial import, because we can just
 | 
|  | 558 | +        replace one directory with another's hash, without doing any recursion.
 | 
|  | 559 | +        """
 | 
|  | 560 | +
 | 
|  | 561 | +        # You must pass a list into _partial_import (not a generator)
 | 
|  | 562 | +        return self._partial_import_cas_into_cas(source_directory, list(files))
 | 
|  | 563 | +
 | 
| 360 | 564 |      def import_files(self, external_pathspec, *, files=None,
 | 
| 361 | 565 |                       report_written=True, update_utimes=False,
 | 
| 362 | 566 |                       can_link=False):
 | 
| ... | ... | @@ -378,28 +582,27 @@ class CasBasedDirectory(Directory): | 
| 378 | 582 |  
 | 
| 379 | 583 |          can_link (bool): Ignored, since hard links do not have any meaning within CAS.
 | 
| 380 | 584 |          """
 | 
| 381 |  | -        if isinstance(external_pathspec, FileBasedDirectory):
 | 
| 382 |  | -            source_directory = external_pathspec._get_underlying_directory()
 | 
| 383 |  | -        elif isinstance(external_pathspec, CasBasedDirectory):
 | 
| 384 |  | -            # TODO: This transfers from one CAS to another via the
 | 
| 385 |  | -            # filesystem, which is very inefficient. Alter this so it
 | 
| 386 |  | -            # transfers refs across directly.
 | 
| 387 |  | -            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
 | 
| 388 |  | -                external_pathspec.export_files(tmpdir)
 | 
| 389 |  | -                if files is None:
 | 
| 390 |  | -                    files = list_relative_paths(tmpdir)
 | 
| 391 |  | -                result = self._import_files_from_directory(tmpdir, files=files)
 | 
| 392 |  | -            return result
 | 
| 393 |  | -        else:
 | 
| 394 |  | -            source_directory = external_pathspec
 | 
| 395 | 585 |  
 | 
| 396 | 586 |          if files is None:
 | 
| 397 |  | -            files = list_relative_paths(source_directory)
 | 
|  | 587 | +            if isinstance(external_pathspec, str):
 | 
|  | 588 | +                files = list_relative_paths(external_pathspec)
 | 
|  | 589 | +            else:
 | 
|  | 590 | +                assert isinstance(external_pathspec, Directory)
 | 
|  | 591 | +                files = external_pathspec.list_relative_paths()
 | 
|  | 592 | +
 | 
|  | 593 | +        if isinstance(external_pathspec, FileBasedDirectory):
 | 
|  | 594 | +            source_directory = external_pathspec.get_underlying_directory()
 | 
|  | 595 | +            result = self._import_files_from_directory(source_directory, files=files)
 | 
|  | 596 | +        elif isinstance(external_pathspec, str):
 | 
|  | 597 | +            source_directory = external_pathspec
 | 
|  | 598 | +            result = self._import_files_from_directory(source_directory, files=files)
 | 
|  | 599 | +        else:
 | 
|  | 600 | +            assert isinstance(external_pathspec, CasBasedDirectory)
 | 
|  | 601 | +            result = self._import_cas_into_cas(external_pathspec, files=files)
 | 
| 398 | 602 |  
 | 
| 399 | 603 |          # TODO: No notice is taken of report_written, update_utimes or can_link.
 | 
| 400 | 604 |          # Current behaviour is to fully populate the report, which is inefficient,
 | 
| 401 | 605 |          # but still correct.
 | 
| 402 |  | -        result = self._import_files_from_directory(source_directory, files=files)
 | 
| 403 | 606 |  
 | 
| 404 | 607 |          # We need to recalculate and store the hashes of all directories both
 | 
| 405 | 608 |          # up and down the tree; we have changed our directory by importing files
 | 
| ... | ... | @@ -511,6 +714,28 @@ class CasBasedDirectory(Directory): | 
| 511 | 714 |          else:
 | 
| 512 | 715 |              self._mark_directory_unmodified()
 | 
| 513 | 716 |  
 | 
|  | 717 | +    def _lightweight_resolve_to_index(self, path):
 | 
|  | 718 | +        """A lightweight function for transforming paths into IndexEntry
 | 
|  | 719 | +        objects. This does not follow symlinks.
 | 
|  | 720 | +
 | 
|  | 721 | +        path: The string to resolve. This should be a series of path
 | 
|  | 722 | +        components separated by the protocol buffer path separator
 | 
|  | 723 | +        _pb2_path_sep.
 | 
|  | 724 | +
 | 
|  | 725 | +        Returns: the IndexEntry found, or None if any of the path components were not present.
 | 
|  | 726 | +
 | 
|  | 727 | +        """
 | 
|  | 728 | +        directory = self
 | 
|  | 729 | +        path_components = path.split(CasBasedDirectory._pb2_path_sep)
 | 
|  | 730 | +        for component in path_components[:-1]:
 | 
|  | 731 | +            if component not in directory.index:
 | 
|  | 732 | +                return None
 | 
|  | 733 | +            if isinstance(directory.index[component].buildstream_object, CasBasedDirectory):
 | 
|  | 734 | +                directory = directory.index[component].buildstream_object
 | 
|  | 735 | +            else:
 | 
|  | 736 | +                return None
 | 
|  | 737 | +        return directory.index.get(path_components[-1], None)
 | 
|  | 738 | +
 | 
| 514 | 739 |      def list_modified_paths(self):
 | 
| 515 | 740 |          """Provide a list of relative paths which have been modified since the
 | 
| 516 | 741 |          last call to mark_unmodified.
 | 
| ... | ... | @@ -518,29 +743,43 @@ class CasBasedDirectory(Directory): | 
| 518 | 743 |          Return value: List(str) - list of modified paths
 | 
| 519 | 744 |          """
 | 
| 520 | 745 |  
 | 
| 521 |  | -        filelist = []
 | 
| 522 |  | -        for (k, v) in self.index.items():
 | 
| 523 |  | -            if isinstance(v.buildstream_object, CasBasedDirectory):
 | 
| 524 |  | -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
 | 
| 525 |  | -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
 | 
| 526 |  | -                filelist.append(k)
 | 
| 527 |  | -        return filelist
 | 
|  | 746 | +        for p in self.list_relative_paths():
 | 
|  | 747 | +            i = self._lightweight_resolve_to_index(p)
 | 
|  | 748 | +            if i and i.modified:
 | 
|  | 749 | +                yield p
 | 
| 528 | 750 |  
 | 
| 529 |  | -    def list_relative_paths(self):
 | 
|  | 751 | +    def list_relative_paths(self, relpath=""):
 | 
| 530 | 752 |          """Provide a list of all relative paths.
 | 
| 531 | 753 |  
 | 
| 532 |  | -        NOTE: This list is not in the same order as utils.list_relative_paths.
 | 
| 533 |  | -
 | 
| 534 | 754 |          Return value: List(str) - list of all paths
 | 
| 535 | 755 |          """
 | 
| 536 | 756 |  
 | 
| 537 |  | -        filelist = []
 | 
| 538 |  | -        for (k, v) in self.index.items():
 | 
| 539 |  | -            if isinstance(v.buildstream_object, CasBasedDirectory):
 | 
| 540 |  | -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
 | 
| 541 |  | -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
 | 
| 542 |  | -                filelist.append(k)
 | 
| 543 |  | -        return filelist
 | 
|  | 757 | +        symlink_list = filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.SymlinkNode),
 | 
|  | 758 | +                              self.index.items())
 | 
|  | 759 | +        file_list = list(filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.FileNode),
 | 
|  | 760 | +                                self.index.items()))
 | 
|  | 761 | +        directory_list = filter(lambda i: isinstance(i[1].buildstream_object, CasBasedDirectory),
 | 
|  | 762 | +                                self.index.items())
 | 
|  | 763 | +
 | 
|  | 764 | +        # We need to mimic the behaviour of os.walk, in which symlinks
 | 
|  | 765 | +        # to directories count as directories and symlinks to file or
 | 
|  | 766 | +        # broken symlinks count as files. os.walk doesn't follow
 | 
|  | 767 | +        # symlinks, so we don't recurse.
 | 
|  | 768 | +        for (k, v) in sorted(symlink_list):
 | 
|  | 769 | +            target = self._resolve(k, absolute_symlinks_resolve=True)
 | 
|  | 770 | +            if isinstance(target, CasBasedDirectory):
 | 
|  | 771 | +                yield os.path.join(relpath, k)
 | 
|  | 772 | +            else:
 | 
|  | 773 | +                file_list.append((k, v))
 | 
|  | 774 | +
 | 
|  | 775 | +        if file_list == [] and relpath != "":
 | 
|  | 776 | +            yield relpath
 | 
|  | 777 | +        else:
 | 
|  | 778 | +            for (k, v) in sorted(file_list):
 | 
|  | 779 | +                yield os.path.join(relpath, k)
 | 
|  | 780 | +
 | 
|  | 781 | +        for (k, v) in sorted(directory_list):
 | 
|  | 782 | +            yield from v.buildstream_object.list_relative_paths(relpath=os.path.join(relpath, k))
 | 
| 544 | 783 |  
 | 
| 545 | 784 |      def recalculate_hash(self):
 | 
| 546 | 785 |          """ Recalcuates the hash for this directory and store the results in
 |