Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream
Commits:
-
a1dc2acd
by Jim MacArthur at 2018-12-05T17:34:45Z
-
aecd7458
by Jim MacArthur at 2018-12-05T17:34:45Z
-
3de81d0b
by Jim MacArthur at 2018-12-05T17:34:45Z
-
11398076
by Jim MacArthur at 2018-12-05T17:34:45Z
-
4400c6e2
by Jim MacArthur at 2018-12-05T17:34:45Z
-
de0d2bde
by Jim MacArthur at 2018-12-05T17:34:45Z
-
7fde6bfd
by Jim MacArthur at 2018-12-05T17:34:45Z
-
d2f5827e
by Jim MacArthur at 2018-12-05T17:34:45Z
5 changed files:
- buildstream/element.py
- buildstream/storage/_casbaseddirectory.py
- buildstream/storage/_filebaseddirectory.py
- buildstream/storage/directory.py
- tests/storage/virtual_directory_import.py
Changes:
... | ... | @@ -1638,9 +1638,7 @@ class Element(Plugin): |
1638 | 1638 |
collectvdir = None
|
1639 | 1639 |
|
1640 | 1640 |
assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
|
1641 |
- filesvdir = assemblevdir.descend("files", create=True)
|
|
1642 | 1641 |
logsvdir = assemblevdir.descend("logs", create=True)
|
1643 |
- buildtreevdir = assemblevdir.descend("buildtree", create=True)
|
|
1644 | 1642 |
metavdir = assemblevdir.descend("meta", create=True)
|
1645 | 1643 |
|
1646 | 1644 |
# Create artifact directory structure
|
... | ... | @@ -1652,14 +1650,14 @@ class Element(Plugin): |
1652 | 1650 |
os.mkdir(metadir)
|
1653 | 1651 |
|
1654 | 1652 |
if collect is not None and collectvdir is not None:
|
1655 |
- filesvdir.import_files(collectvdir, can_link=True)
|
|
1656 |
- |
|
1653 |
+ assemblevdir.fast_directory_import("files", collectvdir)
|
|
1657 | 1654 |
try:
|
1658 | 1655 |
sandbox_vroot = sandbox.get_virtual_directory()
|
1659 | 1656 |
sandbox_build_dir = sandbox_vroot.descend(
|
1660 | 1657 |
self.get_variable('build-root').lstrip(os.sep).split(os.sep))
|
1661 |
- buildtreevdir.import_files(sandbox_build_dir)
|
|
1658 |
+ assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
|
|
1662 | 1659 |
except VirtualDirectoryError:
|
1660 |
+ assemblevdir.descend("buildtree", create=True)
|
|
1663 | 1661 |
# Directory could not be found. Pre-virtual
|
1664 | 1662 |
# directory behaviour was to continue silently
|
1665 | 1663 |
# if the directory could not be found.
|
... | ... | @@ -1709,10 +1707,10 @@ class Element(Plugin): |
1709 | 1707 |
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
|
1710 | 1708 |
|
1711 | 1709 |
metavdir.import_files(metadir)
|
1710 |
+ logsvdir.import_files(logsdir)
|
|
1712 | 1711 |
|
1713 |
- with self.timed_activity("Caching artifact"):
|
|
1714 |
- # TODO: I don't know the artifact size! Can we get _get_dir_size to work for vdirs?
|
|
1715 |
- artifact_size = 1024
|
|
1712 |
+ artifact_size = assemblevdir.get_size()
|
|
1713 |
+ with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
|
|
1716 | 1714 |
self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
|
1717 | 1715 |
|
1718 | 1716 |
if collect is not None and collectvdir is None:
|
... | ... | @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory): |
350 | 350 |
filenode.is_executable = is_executable
|
351 | 351 |
self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
|
352 | 352 |
|
353 |
- def _copy_link_from_filesystem(self, basename, filename):
|
|
354 |
- self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
|
|
353 |
+ def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
|
|
354 |
+ """ filesystem_path should be a full path point to the source symlink.
|
|
355 |
+ relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
|
|
356 |
+ destination_name should be the destination name in this directory. """
|
|
357 |
+ self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
|
|
355 | 358 |
|
356 |
- def _add_new_link_direct(self, name, target):
|
|
359 |
+ def _add_new_link_direct(self, relative_path, name, target):
|
|
357 | 360 |
existing_link = self._find_pb2_entry(name)
|
358 | 361 |
if existing_link:
|
359 | 362 |
symlinknode = existing_link
|
... | ... | @@ -361,8 +364,16 @@ class CasBasedDirectory(Directory): |
361 | 364 |
symlinknode = self.pb2_directory.symlinks.add()
|
362 | 365 |
assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
|
363 | 366 |
symlinknode.name = name
|
364 |
- # A symlink node has no digest.
|
|
367 |
+ |
|
368 |
+ absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
|
|
369 |
+ if absolute:
|
|
370 |
+ distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
|
|
371 |
+ # TODO: Using os.path.join and _pb2_path_sep in the same place is illogical
|
|
372 |
+ target = os.path.join(CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root), target[1:])
|
|
365 | 373 |
symlinknode.target = target
|
374 |
+ |
|
375 |
+ # A symlink node has no digest.
|
|
376 |
+ |
|
366 | 377 |
self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
|
367 | 378 |
|
368 | 379 |
def delete_entry(self, name):
|
... | ... | @@ -527,7 +538,7 @@ class CasBasedDirectory(Directory): |
527 | 538 |
result.combine(subdir_result)
|
528 | 539 |
elif os.path.islink(import_file):
|
529 | 540 |
if self._check_replacement(entry, path_prefix, result):
|
530 |
- self._copy_link_from_filesystem(source_directory, entry)
|
|
541 |
+ self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
|
|
531 | 542 |
result.files_written.append(relative_pathname)
|
532 | 543 |
elif os.path.isdir(import_file):
|
533 | 544 |
# A plain directory which already exists isn't a problem; just ignore it.
|
... | ... | @@ -600,11 +611,9 @@ class CasBasedDirectory(Directory): |
600 | 611 |
filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
|
601 | 612 |
is_executable=item.is_executable)
|
602 | 613 |
self.index[f] = IndexEntry(filenode, modified=True)
|
603 |
- # TODO: No idea if we actually need this (the digest already existed...)
|
|
604 |
- self.cas_cache.add_object(path=os.path.join(path_prefix, f), digest=item.digest)
|
|
605 | 614 |
else:
|
606 | 615 |
assert isinstance(item, remote_execution_pb2.SymlinkNode)
|
607 |
- self._add_new_link_direct(name=f, target=item.target)
|
|
616 |
+ self._add_new_link_direct(path_prefix, name=f, target=item.target)
|
|
608 | 617 |
else:
|
609 | 618 |
result.ignored.append(os.path.join(path_prefix, f))
|
610 | 619 |
return result
|
... | ... | @@ -838,6 +847,28 @@ class CasBasedDirectory(Directory): |
838 | 847 |
self._recalculate_recursing_up()
|
839 | 848 |
self._recalculate_recursing_down()
|
840 | 849 |
|
850 |
+ def get_size(self):
|
|
851 |
+ total = len(self.pb2_directory.SerializeToString())
|
|
852 |
+ for i in self.index.values():
|
|
853 |
+ if isinstance(i.buildstream_object, CasBasedDirectory):
|
|
854 |
+ total += i.buildstream_object.get_size()
|
|
855 |
+ elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
|
|
856 |
+ src_name = self.cas_cache.objpath(i.pb_object.digest)
|
|
857 |
+ filesize = os.stat(src_name).st_size
|
|
858 |
+ total += filesize
|
|
859 |
+ # Symlink nodes are encoded as part of the directory serialization.
|
|
860 |
+ return total
|
|
861 |
+ |
|
862 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
863 |
+ assert dirname not in self.index
|
|
864 |
+ if isinstance(other_directory, CasBasedDirectory):
|
|
865 |
+ self.index[dirname] = IndexEntry(other_directory.pb_object,
|
|
866 |
+ buildstream_object=other_directory.buildstream_object)
|
|
867 |
+ else:
|
|
868 |
+ # Revert to the old method.
|
|
869 |
+ subdir = self.descend(dirname, create=True)
|
|
870 |
+ subdir.import_files(other_directory, can_link=True)
|
|
871 |
+ |
|
841 | 872 |
def _get_identifier(self):
|
842 | 873 |
path = ""
|
843 | 874 |
if self.parent:
|
... | ... | @@ -125,6 +125,13 @@ class FileBasedDirectory(Directory): |
125 | 125 |
self._mark_changed()
|
126 | 126 |
return import_result
|
127 | 127 |
|
128 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
129 |
+ # We can't do a fast import into a FileBasedDirectory, so this
|
|
130 |
+ # falls back to import_files.
|
|
131 |
+ assert dirname not in self.index
|
|
132 |
+ subdir = self.descend(dirname, create=True)
|
|
133 |
+ subdir.import_files(other_directory, can_link=True)
|
|
134 |
+ |
|
128 | 135 |
def _mark_changed(self):
|
129 | 136 |
self._directory_read = False
|
130 | 137 |
|
... | ... | @@ -99,6 +99,30 @@ class Directory(): |
99 | 99 |
|
100 | 100 |
raise NotImplementedError()
|
101 | 101 |
|
102 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
103 |
+ """Import other_directory as a new directory in this one.
|
|
104 |
+ |
|
105 |
+ This is a potentially faster method than import_directory with
|
|
106 |
+ fewer options. dirname must not already exist, and all files
|
|
107 |
+ are imported unconditionally. It is assumed that it is
|
|
108 |
+ acceptable to use filesystem hard links to files in
|
|
109 |
+ other_directory. You cannot update utimes or get a
|
|
110 |
+ FileListResult.
|
|
111 |
+ |
|
112 |
+ This only provides a benefit if both this and other_directory
|
|
113 |
+ are CAS-based directories. In other cases, it will fall back
|
|
114 |
+ to import_directory.
|
|
115 |
+ |
|
116 |
+ Args:
|
|
117 |
+ dirname: The name to call the subdirectory in this
|
|
118 |
+ directory. This must not already exist in this directory.
|
|
119 |
+ |
|
120 |
+ other_directory: The directory to import.
|
|
121 |
+ |
|
122 |
+ """
|
|
123 |
+ |
|
124 |
+ raise NotImplementedError()
|
|
125 |
+ |
|
102 | 126 |
def export_files(self, to_directory, *, can_link=False, can_destroy=False):
|
103 | 127 |
"""Copies everything from this into to_directory.
|
104 | 128 |
|
... | ... | @@ -149,10 +149,10 @@ def resolve_symlinks(path, root): |
149 | 149 |
if target.startswith(os.path.sep):
|
150 | 150 |
# Absolute link - relative to root
|
151 | 151 |
location = os.path.join(root, target, tail)
|
152 |
+ return resolve_symlinks(location, root)
|
|
152 | 153 |
else:
|
153 |
- # Relative link - relative to symlink location
|
|
154 |
- location = os.path.join(location, target)
|
|
155 |
- return resolve_symlinks(location, root)
|
|
154 |
+ return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
|
|
155 |
+ |
|
156 | 156 |
# If we got here, no symlinks were found. Add on the final component and return.
|
157 | 157 |
location = os.path.join(location, components[-1])
|
158 | 158 |
return location
|
... | ... | @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents= |
199 | 199 |
pass
|
200 | 200 |
else:
|
201 | 201 |
assert os.path.islink(realpath)
|
202 |
- assert os.readlink(realpath) == content
|
|
202 |
+ # We expect all storage to normalise absolute symlinks.
|
|
203 |
+ depth = len(path.split(os.path.sep)) - 1
|
|
204 |
+ if content.startswith(os.path.sep):
|
|
205 |
+ assert os.readlink(realpath) == os.path.join(os.path.sep.join([".."] * depth), content[1:])
|
|
206 |
+ else:
|
|
207 |
+ assert os.readlink(realpath) == content
|
|
208 |
+ |
|
203 | 209 |
elif typename == 'D':
|
204 | 210 |
# We can't do any more tests than this because it
|
205 | 211 |
# depends on things present in the original. Blank
|