From c83f6f17866a3d114c174ba282e4dff2fd9996f7 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Sun, 18 Nov 2018 21:27:42 -0800 Subject: [PATCH] Use ThreadPool for cache fetching and rust tar for artifact extration (#6748) ### Problem https://github.com/pantsbuild/pants/blob/11aa11f2a3abc865e32a9f513abed4a3877b9272/src/python/pants/cache/restful_artifact_cache.py#L75 would hang and thus never exiting the multiprocessing pool. The closest issue I can find is https://stackoverflow.com/questions/25943923/requests-get-hangs-when-called-in-a-multiprocessing-pool which doesn't have any solution. I was able to repro the issue almost consistently with PyCharm debug mode with ``` clean-all compile --cache-read-from="https://dummy_url/" examples/tests/java/org/pantsbuild/example/hello/greet/: ... [zinc] [cache] (stuck here) ``` and the issue seems to be resolved by using `ThreadPool`. # Solution Since most of the untar work was done in Python and was CPU bound, we chose to move that to rust for the performance. --- .../pants/backend/jvm/tasks/jvmdoc_gen.py | 5 +- src/python/pants/base/worker_pool.py | 10 +- src/python/pants/cache/BUILD | 1 + src/python/pants/cache/artifact.py | 37 ++------ src/python/pants/cache/cache_setup.py | 3 + src/python/pants/engine/native.py | 6 ++ src/rust/engine/Cargo.lock | 85 +++++++++++++++++ src/rust/engine/Cargo.toml | 5 +- src/rust/engine/src/lib.rs | 28 ++++++ src/rust/engine/tar_api/Cargo.toml | 15 +++ src/rust/engine/tar_api/src/tar_api.rs | 93 +++++++++++++++++++ tests/python/pants_test/cache/BUILD | 2 + .../python/pants_test/cache/test_artifact.py | 29 +++++- .../pants_test/cache/test_artifact_cache.py | 11 ++- 14 files changed, 284 insertions(+), 46 deletions(-) create mode 100644 src/rust/engine/tar_api/Cargo.toml create mode 100644 src/rust/engine/tar_api/src/tar_api.rs diff --git a/src/python/pants/backend/jvm/tasks/jvmdoc_gen.py b/src/python/pants/backend/jvm/tasks/jvmdoc_gen.py index 1bb945879ab..76096b1544a 100644 --- a/src/python/pants/backend/jvm/tasks/jvmdoc_gen.py +++ b/src/python/pants/backend/jvm/tasks/jvmdoc_gen.py @@ -9,6 +9,7 @@ import multiprocessing import os import re +from multiprocessing.pool import ThreadPool from pants.backend.jvm.tasks.jvm_task import JvmTask from pants.base.exceptions import TaskError @@ -157,8 +158,10 @@ def _generate_individual(self, targets, create_jvmdoc_command): jobs[gendir] = (target, command) if jobs: + # Use ThreadPool as there may be dangling processes that cause identical run id and + # then buildstats error downstream. https://github.com/pantsbuild/pants/issues/6785 with contextlib.closing( - multiprocessing.Pool(processes=min(len(jobs), multiprocessing.cpu_count()))) as pool: + ThreadPool(processes=min(len(jobs), multiprocessing.cpu_count()))) as pool: # map would be a preferable api here but fails after the 1st batch with an internal: # ... # File "...src/python/pants/backend/jvm/tasks/jar_create.py", line 170, in javadocjar diff --git a/src/python/pants/base/worker_pool.py b/src/python/pants/base/worker_pool.py index 100362200c0..3c843bb7a59 100644 --- a/src/python/pants/base/worker_pool.py +++ b/src/python/pants/base/worker_pool.py @@ -5,8 +5,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals import multiprocessing -import signal -import sys import threading from builtins import next, object from multiprocessing.pool import ThreadPool @@ -192,11 +190,6 @@ class SubprocPool(object): _lock = threading.Lock() _num_processes = multiprocessing.cpu_count() - @staticmethod - def worker_init(): - # Exit quietly on sigint, otherwise we get {num_procs} keyboardinterrupt stacktraces spewn - signal.signal(signal.SIGINT, lambda *args: sys.exit()) - @classmethod def set_num_processes(cls, num_processes): cls._num_processes = num_processes @@ -205,8 +198,7 @@ def set_num_processes(cls, num_processes): def foreground(cls): with cls._lock: if cls._pool is None: - cls._pool = multiprocessing.Pool(processes=cls._num_processes, - initializer=SubprocPool.worker_init) + cls._pool = ThreadPool(processes=cls._num_processes) return cls._pool @classmethod diff --git a/src/python/pants/cache/BUILD b/src/python/pants/cache/BUILD index 7b09d7b11dc..e2eaa39b498 100644 --- a/src/python/pants/cache/BUILD +++ b/src/python/pants/cache/BUILD @@ -9,6 +9,7 @@ python_library( '3rdparty/python:six', 'src/python/pants/base:deprecated', 'src/python/pants/base:validation', + 'src/python/pants/engine:native', 'src/python/pants/option', 'src/python/pants/subsystem', 'src/python/pants/util:contextutil', diff --git a/src/python/pants/cache/artifact.py b/src/python/pants/cache/artifact.py index ae005f38615..a4c6be835fb 100644 --- a/src/python/pants/cache/artifact.py +++ b/src/python/pants/cache/artifact.py @@ -4,11 +4,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import errno import os import shutil -import tarfile -from builtins import object, str from pants.util.contextutil import open_tar from pants.util.dirutil import safe_mkdir, safe_mkdir_for, safe_walk @@ -83,6 +80,8 @@ def extract(self): class TarballArtifact(Artifact): """An artifact stored in a tarball.""" + NATIVE_BINARY = None + # TODO: Expose `dereference` for tasks. # https://github.com/pantsbuild/pants/issues/3961 def __init__(self, artifact_root, tarfile_, compression=9, dereference=True): @@ -109,30 +108,10 @@ def collect(self, paths): self._relpaths.add(relpath) def extract(self): + # Note(yic): unlike the python implementation before, now we do not update self._relpath + # after the extraction. try: - with open_tar(self._tarfile, 'r', errorlevel=2) as tarin: - # Note: We create all needed paths proactively, even though extractall() can do this for us. - # This is because we may be called concurrently on multiple artifacts that share directories, - # and there will be a race condition inside extractall(): task T1 A) sees that a directory - # doesn't exist and B) tries to create it. But in the gap between A) and B) task T2 creates - # the same directory, so T1 throws "File exists" in B). - # This actually happened, and was very hard to debug. - # Creating the paths here up front allows us to squelch that "File exists" error. - paths = [] - dirs = set() - for tarinfo in tarin.getmembers(): - paths.append(tarinfo.name) - if tarinfo.isdir(): - dirs.add(tarinfo.name) - else: - dirs.add(os.path.dirname(tarinfo.name)) - for d in dirs: - try: - os.makedirs(os.path.join(self._artifact_root, d)) - except OSError as e: - if e.errno != errno.EEXIST: - raise - tarin.extractall(self._artifact_root) - self._relpaths.update(paths) - except tarfile.ReadError as e: - raise ArtifactError(str(e)) + self.NATIVE_BINARY.decompress_tarball(self._tarfile.encode('utf-8'), + self._artifact_root.encode('utf-8')) + except Exception as e: + raise ArtifactError("Extracting artifact failed:\n{}".format(e)) diff --git a/src/python/pants/cache/cache_setup.py b/src/python/pants/cache/cache_setup.py index 2aa1a369d8c..1d10f6cc732 100644 --- a/src/python/pants/cache/cache_setup.py +++ b/src/python/pants/cache/cache_setup.py @@ -12,6 +12,7 @@ from future.moves.urllib.parse import urlparse from pants.base.build_environment import get_buildroot +from pants.cache.artifact import TarballArtifact from pants.cache.artifact_cache import ArtifactCacheError from pants.cache.local_artifact_cache import LocalArtifactCache, TempLocalArtifactCache from pants.cache.pinger import BestUrlSelector, Pinger @@ -134,6 +135,8 @@ def __init__(self, options, log, task, pinger=None, resolver=None): else: self._resolver = NoopResolver() + TarballArtifact.NATIVE_BINARY = task.context._scheduler._scheduler._native + @staticmethod def make_task_cache_dirname(task): """Use the task fingerprint as the name of the cache subdirectory to store diff --git a/src/python/pants/engine/native.py b/src/python/pants/engine/native.py index 6c40338f01a..b78091b4ceb 100644 --- a/src/python/pants/engine/native.py +++ b/src/python/pants/engine/native.py @@ -252,6 +252,8 @@ void lease_files_in_graph(Scheduler*); void garbage_collect_store(Scheduler*); + +PyResult decompress_tarball(char*, char*); ''' CFFI_EXTERNS = ''' @@ -787,6 +789,10 @@ def buffer(self, cdata): def to_ids_buf(self, types): return self.context.type_ids_buf([TypeId(self.context.to_id(t)) for t in types]) + def decompress_tarball(self, tarfile_path, dest_dir): + result = self.lib.decompress_tarball(tarfile_path, dest_dir) + return self.context.raise_or_return(result) + def new_tasks(self): return self.gc(self.lib.tasks_create(), self.lib.tasks_destroy) diff --git a/src/rust/engine/Cargo.lock b/src/rust/engine/Cargo.lock index 29a0fc89389..28bbf9b6e9f 100644 --- a/src/rust/engine/Cargo.lock +++ b/src/rust/engine/Cargo.lock @@ -370,6 +370,7 @@ dependencies = [ "reqwest 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "resettable 0.0.1", "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", + "tar_api 0.0.1", "tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "tokio 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "ui 0.0.1", @@ -432,11 +433,31 @@ name = "fake-simd" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "filetime" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fixedbitset" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "flate2" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "fnv" version = "1.0.6" @@ -905,6 +926,34 @@ dependencies = [ "unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "miniz-sys" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "miniz_oxide" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "miniz_oxide_c_api" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "mio" version = "0.6.16" @@ -1599,6 +1648,27 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "tar" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "filetime 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tar_api" +version = "0.0.1" +dependencies = [ + "flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "tar 0.4.20 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "testutil 0.0.1", +] + [[package]] name = "tempfile" version = "3.0.4" @@ -2060,6 +2130,14 @@ dependencies = [ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "xattr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c" "checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" @@ -2104,7 +2182,9 @@ dependencies = [ "checksum failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6dd377bcc1b1b7ce911967e3ec24fa19c3224394ec05b54aa7b083d498341ac7" "checksum failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64c2d913fe8ed3b6c6518eedf4538255b989945c14c2a7d5cbff62a5e2120596" "checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +"checksum filetime 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b3ea0c97183a611b1673e5e28b160d7e1035106cad053c988aae3bbd996fdcce" "checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" +"checksum flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3b0c7353385f92079524de3b7116cf99d73947c08a7472774e9b3b04bff3b901" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" "checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" @@ -2149,6 +2229,9 @@ dependencies = [ "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" "checksum mime 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0a907b83e7b9e987032439a387e187119cddafc92d5c2aaeb1d92580a793f630" "checksum mime_guess 2.0.0-alpha.6 (registry+https://github.com/rust-lang/crates.io-index)" = "30de2e4613efcba1ec63d8133f344076952090c122992a903359be5a4f99c3ed" +"checksum miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0300eafb20369952951699b68243ab4334f4b10a88f411c221d444b36c40e649" +"checksum miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ad30a47319c16cde58d0314f5d98202a80c9083b5f61178457403dfb14e509c" +"checksum miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "28edaef377517fd9fe3e085c37d892ce7acd1fbeab9239c5a36eec352d8a8b7e" "checksum mio 0.6.16 (registry+https://github.com/rust-lang/crates.io-index)" = "71646331f2619b1026cc302f87a2b8b648d5c6dd6937846a16cc8ce0f347f432" "checksum mio-named-pipes 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "f5e374eff525ce1c5b7687c4cef63943e7686524a387933ad27ca7ec43779cb3" "checksum mio-uds 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "966257a94e196b11bb43aca423754d87429960a768de9414f3691d6957abf125" @@ -2223,6 +2306,7 @@ dependencies = [ "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum syn 0.15.20 (registry+https://github.com/rust-lang/crates.io-index)" = "8886c8d2774e853fcd7d9d2131f6e40ba46c9c0e358e4d57178452abd6859bb0" "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" +"checksum tar 0.4.20 (registry+https://github.com/rust-lang/crates.io-index)" = "a303ba60a099fcd2aaa646b14d2724591a96a75283e4b7ed3d1a1658909d9ae2" "checksum tempfile 3.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "55c1195ef8513f3273d55ff59fe5da6940287a0d7a98331254397f464833675b" "checksum termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4096add70612622289f2fdcdbd5086dc81c1e2675e6ae58d6c4f62a16c6d7f2f" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" @@ -2273,3 +2357,4 @@ dependencies = [ "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" "checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba" "checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" +"checksum xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" diff --git a/src/rust/engine/Cargo.toml b/src/rust/engine/Cargo.toml index 94e59695353..936c3bdad83 100644 --- a/src/rust/engine/Cargo.toml +++ b/src/rust/engine/Cargo.toml @@ -37,6 +37,7 @@ members = [ "process_execution/bazel_protos", "process_executor", "resettable", + "tar_api", "testutil", "testutil/mock", "testutil/local_cas", @@ -63,6 +64,7 @@ default-members = [ "process_execution/bazel_protos", "process_executor", "resettable", + "tar_api", "testutil", "testutil/mock", "testutil/local_cas", @@ -77,6 +79,7 @@ fs = { path = "fs" } futures = "^0.1.16" graph = { path = "graph" } hashing = { path = "hashing" } +indexmap = "1.0.2" itertools = "0.7.2" lazy_static = "1" log = "0.4" @@ -90,4 +93,4 @@ tokio = "0.1" tempfile = "3" ui = { path = "ui" } url = "1.7.1" -indexmap = "1.0.2" +tar_api = { path = "tar_api" } diff --git a/src/rust/engine/src/lib.rs b/src/rust/engine/src/lib.rs index cb2db489ce5..c28740e5dd2 100644 --- a/src/rust/engine/src/lib.rs +++ b/src/rust/engine/src/lib.rs @@ -64,6 +64,7 @@ extern crate process_execution; extern crate reqwest; extern crate resettable; extern crate smallvec; +extern crate tar_api; extern crate tempfile; extern crate tokio; extern crate ui; @@ -523,6 +524,33 @@ pub extern "C" fn graph_len(scheduler_ptr: *mut Scheduler) -> u64 { with_scheduler(scheduler_ptr, |scheduler| scheduler.core.graph.len() as u64) } +#[no_mangle] +pub extern "C" fn decompress_tarball( + tar_path: *const raw::c_char, + output_dir: *const raw::c_char, +) -> PyResult { + let tar_path_str = PathBuf::from( + unsafe { CStr::from_ptr(tar_path) } + .to_string_lossy() + .into_owned(), + ); + let output_dir_str = PathBuf::from( + unsafe { CStr::from_ptr(output_dir) } + .to_string_lossy() + .into_owned(), + ); + + tar_api::decompress_tgz(tar_path_str.as_path(), output_dir_str.as_path()) + .map_err(|e| { + format!( + "Failed to untar {:?} to {:?}:\n{:?}", + tar_path_str.as_path(), + output_dir_str.as_path(), + e + ) + }).into() +} + #[no_mangle] pub extern "C" fn graph_visualize( scheduler_ptr: *mut Scheduler, diff --git a/src/rust/engine/tar_api/Cargo.toml b/src/rust/engine/tar_api/Cargo.toml new file mode 100644 index 00000000000..d1bbf0b9429 --- /dev/null +++ b/src/rust/engine/tar_api/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "tar_api" +version = "0.0.1" +authors = [ "Pants Build " ] + +[lib] +path = "src/tar_api.rs" + +[dependencies] +flate2 = "1.0" +tar = "0.4.20" + +[dev-dependencies] +tempfile = "3" +testutil = { path = "../testutil" } \ No newline at end of file diff --git a/src/rust/engine/tar_api/src/tar_api.rs b/src/rust/engine/tar_api/src/tar_api.rs new file mode 100644 index 00000000000..111ba012c1b --- /dev/null +++ b/src/rust/engine/tar_api/src/tar_api.rs @@ -0,0 +1,93 @@ +extern crate flate2; +extern crate tar; +#[cfg(test)] +extern crate tempfile; +#[cfg(test)] +extern crate testutil; + +use flate2::read::GzDecoder; +use std::fs::File; +use std::path::Path; +use tar::Archive; + +pub fn decompress_tgz(tar_path: &Path, output_dir: &Path) -> Result<(), std::io::Error> { + let tar_gz = File::open(tar_path)?; + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + archive.unpack(output_dir)?; + Ok(()) +} + +#[cfg(test)] +pub mod tar_tests { + use super::decompress_tgz; + use flate2::write::GzEncoder; + use flate2::Compression; + use std::fs::File; + use std::path::{Path, PathBuf}; + use tempfile::TempDir; + use testutil::file::contents; + use testutil::make_file; + + #[test] + fn decompress_normal_tar_file() { + // prepare a file containing 'hello world' + let tmp_dir = TempDir::new().unwrap(); + let content = "hello world".as_bytes().to_vec(); + let txt_full_path = std::fs::canonicalize(tmp_dir.path()) + .unwrap() + .join(&"hello.txt"); + make_file(&txt_full_path, &content, 0o600); + + let path_in_tar = "a/b/c/d.txt"; + let tgz_path = std::fs::canonicalize(tmp_dir.path()) + .unwrap() + .join(&"simple.tgz"); + + compress(&txt_full_path, &path_in_tar, &tgz_path).expect("Error compressing."); + + // uncompress the tgz then make sure the content is good. + let tmp_dest_dir = tempfile::TempDir::new().unwrap(); + decompress_tgz(&tgz_path.as_path(), &tmp_dest_dir.path()).expect("Error decompressing."); + let expected_txt_path = std::fs::canonicalize(tmp_dest_dir.path()) + .unwrap() + .join(&path_in_tar); + assert!(expected_txt_path.exists()); + assert_eq!(content, contents(&expected_txt_path)) + } + + #[test] + fn decompress_invalid_tar_file_path() { + let result = decompress_tgz(&PathBuf::from("invalid_tar_path"), &PathBuf::from("a_dir")); + assert!(result.is_err()) + } + + #[test] + fn decompress_invalid_tar_content() { + let dir = tempfile::TempDir::new().unwrap(); + let tar_filename = PathBuf::from("marmosets"); + let content = "definitely not a valid tar".as_bytes().to_vec(); + let tar_path = std::fs::canonicalize(dir.path()) + .unwrap() + .join(&tar_filename); + make_file(&tar_path, &content, 0o600); + let result = decompress_tgz(&tar_path, &PathBuf::from("a_dir")); + assert!(result.is_err()) + } + + fn compress( + txt_full_path: &Path, + path_in_tar: &str, + tar_full_path: &Path, + ) -> Result<(), std::io::Error> { + // compress that file into a/b/c/hello.txt in the tar + let enc = GzEncoder::new( + File::create(tar_full_path.clone()).unwrap(), + Compression::default(), + ); + let mut tar = tar::Builder::new(enc); + tar.append_file(path_in_tar, &mut File::open(txt_full_path.clone()).unwrap())?; + tar.into_inner()?; + Ok(()) + } +} diff --git a/tests/python/pants_test/cache/BUILD b/tests/python/pants_test/cache/BUILD index ac101f791ca..3e00aba40ef 100644 --- a/tests/python/pants_test/cache/BUILD +++ b/tests/python/pants_test/cache/BUILD @@ -8,6 +8,7 @@ python_tests( 'src/python/pants/cache', 'src/python/pants/util:contextutil', 'src/python/pants/util:dirutil', + 'tests/python/pants_test:test_base', ] ) @@ -21,6 +22,7 @@ python_tests( 'src/python/pants/invalidation', 'src/python/pants/util:contextutil', 'src/python/pants/util:dirutil', + 'tests/python/pants_test:test_base', ] ) diff --git a/tests/python/pants_test/cache/test_artifact.py b/tests/python/pants_test/cache/test_artifact.py index 14f3a270706..fcbcad373bc 100644 --- a/tests/python/pants_test/cache/test_artifact.py +++ b/tests/python/pants_test/cache/test_artifact.py @@ -7,12 +7,20 @@ import os import unittest -from pants.cache.artifact import DirectoryArtifact, TarballArtifact +from pants.cache.artifact import ArtifactError, DirectoryArtifact, TarballArtifact from pants.util.contextutil import temporary_dir from pants.util.dirutil import safe_mkdir, safe_open +from pants_test.test_base import TestBase -class TarballArtifactTest(unittest.TestCase): +class TarballArtifactTest(TestBase): + + def setUp(self): + super(TarballArtifactTest, self).setUp() + # Init engine because decompression now goes through native code. + self._init_engine() + TarballArtifact.NATIVE_BINARY = self._scheduler._scheduler._native + def test_get_paths_after_collect(self): with temporary_dir() as tmpdir: artifact_root = os.path.join(tmpdir, 'artifacts') @@ -48,10 +56,23 @@ def test_exists_true_when_exists(self): self.assertTrue(artifact.exists()) - def touch_file_in(self, artifact_root): + def test_non_existent_tarball_extraction(self): + with temporary_dir() as tmpdir: + artifact = TarballArtifact(artifact_root=tmpdir, tarfile_='vapor.tar') + with self.assertRaises(ArtifactError): + artifact.extract() + + def test_corrupt_tarball_extraction(self): + with temporary_dir() as tmpdir: + path = self.touch_file_in(tmpdir, content='invalid') + artifact = TarballArtifact(artifact_root=tmpdir, tarfile_=path) + with self.assertRaises(ArtifactError): + artifact.extract() + + def touch_file_in(self, artifact_root, content=''): path = os.path.join(artifact_root, 'some.file') with safe_open(path, 'w') as f: - f.write('') + f.write(content) return path diff --git a/tests/python/pants_test/cache/test_artifact_cache.py b/tests/python/pants_test/cache/test_artifact_cache.py index 73bae63ab2d..94675f36798 100644 --- a/tests/python/pants_test/cache/test_artifact_cache.py +++ b/tests/python/pants_test/cache/test_artifact_cache.py @@ -5,10 +5,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os -import unittest from builtins import open, str from contextlib import contextmanager +from pants.cache.artifact import TarballArtifact from pants.cache.artifact_cache import (NonfatalArtifactCacheError, call_insert, call_use_cached_files) from pants.cache.local_artifact_cache import LocalArtifactCache, TempLocalArtifactCache @@ -18,13 +18,14 @@ from pants.util.contextutil import temporary_dir, temporary_file, temporary_file_path from pants.util.dirutil import safe_mkdir from pants_test.cache.cache_server import cache_server +from pants_test.test_base import TestBase TEST_CONTENT1 = b'muppet' TEST_CONTENT2 = b'kermit' -class TestArtifactCache(unittest.TestCase): +class TestArtifactCache(TestBase): @contextmanager def setup_local_cache(self): with temporary_dir() as artifact_root: @@ -52,6 +53,12 @@ def setup_test_file(self, parent): f.close() yield path + def setUp(self): + super(TestArtifactCache, self).setUp() + # Init engine because decompression now goes through native code. + self._init_engine() + TarballArtifact.NATIVE_BINARY = self._scheduler._scheduler._native + def test_local_cache(self): with self.setup_local_cache() as artifact_cache: self.do_test_artifact_cache(artifact_cache)