diff --git a/benchmarks/performance_test.py b/benchmarks/performance_test.py new file mode 100644 index 0000000000..31a027505b --- /dev/null +++ b/benchmarks/performance_test.py @@ -0,0 +1,505 @@ +# Copyright (c) 2015-2024 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Base class for performance test.""" +import csv +import glob +import os +import platform +import time +from datetime import datetime, timezone +from io import BytesIO +from itertools import zip_longest +from threading import Thread + +import cpuinfo +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import psutil + +OS_TYPE = platform.system() +OS_MEMORY_KEY_MAP = { + "Windows": "Memory Usage (Physical + PageFile)", + "Linux": "Memory Usage (Physical + Swap)" +} + +MEMORY_KEY = OS_MEMORY_KEY_MAP.get(OS_TYPE, "Memory Usage") +FIGURES = {"Process Time (single scene average)": {"key_y": "Time (s)", "colors": ["#4E79A7"]}, + "Average CPU Usage": {"key_y": "Avg CPU (%)", "colors": ["#F28E2B"]}, + MEMORY_KEY: {"key_y": ["Avg Memory (GB)", "Max Memory (GB)"], "colors": ["#59A14F", "#EDC948"]}} + + +class SatpyPerformanceTest: + """Test satpy performance by looping through conditions involving ``dask_array_chunk_size``and ``dask_num_workers``. + + There are two tests: ``simple_test`` and ``resampler_test``. See below for details. + + """ + def __init__(self, work_dir, folder_pattern, reader_name, composite, chunk_size_opts, worker_opts, + reader_kwargs=None): + """Initialize SatpyPerformanceTest with some basic arguments. + + Args: + work_dir (str): Absolute path to the base directory that contains all your dataset folders. + folder_pattern (str): Naming scheme of the dataset folders, e.g. `G16_s*_e*_FLDK`. + This will be used for ``glob.glob`` to search the datasets. + reader_name (str): Reader you want to test. + composite (str): Composite for test. + chunk_size_opts (list): All the ``dask_array_chunk_size`` values you wish for, in `MiB`. + worker_opts (list): All the ``dask_num_workers`` values you wish for. + reader_kwargs (dict): Additional reader arguments for ``Scene``, + like ``{'mask_saturated': False}`` in modis_l1b. + + """ + super().__init__() + self.work_dir = work_dir + self.folder_pattern = folder_pattern + self.reader_name = reader_name + self.reader_kwargs = reader_kwargs + self.composite = composite + + self.folders = glob.glob(f"{self.work_dir}/{self.folder_pattern}") + + self.chunk_size_opts = chunk_size_opts + self.worker_opts = worker_opts + + self.result = {} + self.running = True + + def monitor_system_usage(self, interval=0.5): + """Use psutil to record CPU and memory usage. Default sample rate is 0.5s.""" + process = psutil.Process() + cpu_usage = [] + memory_usage = [] + timestamps = [] + + start_time = time.time() + while self.running: + cpu_usage.append(process.cpu_percent()) + if OS_TYPE == "Windows": + # In Windows, "vms" means "pagefile" + memory_usage.append((process.memory_full_info().rss + process.memory_full_info().vms)) + elif OS_TYPE == "Linux": + memory_usage.append((process.memory_full_info().rss + process.memory_full_info().swap)) + else: + memory_usage.append(process.memory_full_info().rss) + timestamps.append(time.time() - start_time) + time.sleep(interval) + + self.result["cpu_usage"] = cpu_usage + self.result["memory_usage"] = memory_usage + self.result["timestamps"] = timestamps + + def write_to_csv(self, file_name): + """Write the result of each round to a csv file.""" + with open(file_name, "w", newline="", encoding="utf-8") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(["Timestamp (s)", "CPU Usage (%)", "Memory Usage (Byte)", "Process Time", "Scenes", + "Errors"]) + for ts, cpu, mem, pt, scn, er in zip_longest(self.result["timestamps"], self.result["cpu_usage"], + self.result["memory_usage"], self.result["process_time"], + self.result["scenes"], self.result["errors"], fillvalue="N/A"): + csvwriter.writerow([ts, cpu, mem, pt, scn, er]) + + def satpy_test(self, resampler, generate=False, area_def=None, resampler_kwargs=None): + """Call satpy to do the test.""" + from satpy import Scene, find_files_and_readers + + reader_kwargs = {} if self.reader_kwargs is None else self.reader_kwargs + resampler_kwargs = {} if resampler_kwargs is None else resampler_kwargs + for folder in self.folders: + files = find_files_and_readers(base_dir=folder, reader=self.reader_name) + scn = Scene(filenames=files, reader_kwargs=reader_kwargs) + scn.load([self.composite], generate=generate) + + if resampler == "none": + scn2 = scn + else: + scn2 = scn.resample(area_def, resampler=resampler, **resampler_kwargs) + + scn2.save_dataset(self.composite, writer="geotiff", filename="test.tif", base_dir=self.work_dir, + fill_value=0, compress=None) + + def single_loop(self, conditions, generate=False, area_def=None, resampler_kwargs=None): + """Single round of the test.""" + import dask.config + self.running = True + + chunk_size, num_worker, resampler = conditions + + dask.config.set({"array.chunk-size": f"{chunk_size}MiB"}) + dask.config.set(num_workers=num_worker) + + try: + num_thread = os.environ["OMP_NUM_THREADS"] + except KeyError: + num_thread = psutil.cpu_count(logical=True) + + print(f"Start testing CHUNK_SIZE={chunk_size}MiB, NUM_WORKER={num_worker}, NUM_THREADS={num_thread}, " # noqa + f"""resampler is "{resampler}".""") + + # Start recording cpu/mem usage + monitor_thread = Thread(target=self.monitor_system_usage, args=(0.5,)) + monitor_thread.start() + + errors = [] + start = time.perf_counter() + try: + self.satpy_test(resampler, generate, area_def, resampler_kwargs) + except Exception as e: + errors.append(e) + + end = time.perf_counter() + # All of these must be list object + self.result["process_time"] = [end - start] + self.result["scenes"] = [len(self.folders)] + self.result["errors"] = errors + + # Stop recording + self.running = False + monitor_thread.join() + + if area_def is None: + area = "original" + else: + area = "local" if len(area_def.area_id) == 0 else area_def.area_id.replace("_", "") + + csv_file = (f"{self.work_dir}/{self.reader_name.replace('_', '')}_" + f"chunk{chunk_size}_worker{num_worker}_thread{num_thread}_{area}_{resampler}.csv") + self.write_to_csv(csv_file) + + def simple_test(self, diff_res=False): + """Test the reader in dataset's original projection, with no resampling or the simplest ``native`` resampling. + + Args: + diff_res (bool): If the composite requires bands in different resolutions, this should be set to `True` + so the native resampler will match them to the ``scn.finest_area()``. + For example, ``true_color`` of ABI needs 500m C01 and 1000m C02 bands, so it's `True`. + **This is not a test option and should be set properly according to the composite,** + otherwise the test will end up in errors. + + """ + resampler = "native" if diff_res else "none" + generate = not diff_res + total_rounds = len(self.chunk_size_opts) * len(self.worker_opts) + + print(f"{self.reader_name} test started. Composite is \"{self.composite}\".\n") # noqa + i = 0 + for chunk_size in self.chunk_size_opts: + for num_worker in self.worker_opts: + self.single_loop((chunk_size, num_worker, resampler), generate=generate) + i = i + 1 + + if i == total_rounds: + print(f"ROUND {i} / {total_rounds} Completed. Generating HTML report.") # noqa + html_report(self.work_dir, self.reader_name) + else: + print(f"ROUND {i} / {total_rounds} Completed. Now take a 1-min rest.\n") # noqa + time.sleep(60) + + def resampler_test(self, resamplers, area_def, resampler_kwargs=None): + """Test the reader with resampling. + + Args: + resamplers (list or str): A single resampling algorithm or a list of resampling algorithms you want to test. + area_def (AreaDefinition or DynamicAreaDefinition or str): Area definition or the name of an area stored + in YAML. + resampler_kwargs (dict): Additional arguments passed to the resampler. You can specify the separate + kwargs for each resampler, e.g. + ``{'bilinear': {'cache_dir': '/path/to/my/cache'}, + 'ewa': {'weight_delta_max': 40, 'weight_distance_max': 2}}``. + Or you can just give general kwargs like ``{'cache_dir': '/path/to/my/cache'}`` for + both ``nearest`` and ``bilinear``. + + """ + resamplers = [resamplers] if not isinstance(resamplers, list) else resamplers + resampler_kwargs = {} if resampler_kwargs is None else resampler_kwargs + total_rounds = len(self.chunk_size_opts) * len(self.worker_opts) * len(resamplers) + + print(f"{self.reader_name} test started. Composite is \"{self.composite}\".\n") # noqa + i = 0 + for chunk_size in self.chunk_size_opts: + for num_worker in self.worker_opts: + for resampler in resamplers: + try: + single_resampler_kwargs = resampler_kwargs[resampler] + except KeyError: + single_resampler_kwargs = resampler_kwargs + + self.single_loop((chunk_size, num_worker, resampler), + area_def=area_def, resampler_kwargs=single_resampler_kwargs) + i = i + 1 + + if i == total_rounds: + print(f"ROUND {i} / {total_rounds} Completed. Generating HTML report.") # noqa + html_report(self.work_dir, self.reader_name) + else: + print(f"ROUND {i} / {total_rounds} Completed. Now take a 1-min rest.\n") # noqa + time.sleep(60) + + +def process_csv(cvs_file): + """Process result csv and return a dataframe.""" + # Extract information from the filename + filename = os.path.basename(cvs_file) + filename = filename.split(".")[0] + filename_parts = filename.split("_") + dask_array_chunk_size = int(filename_parts[1].replace("chunk", "")) + dask_num_workers = int(filename_parts[2].replace("worker", "")) + omp_num_threads = int(filename_parts[3].replace("thread", "")) + area = filename_parts[4] + resampling_alg = filename_parts[5] + + data = pd.read_csv(cvs_file, keep_default_na=False) + scenes = int(data.loc[0, "Scenes"]) + cpu_thread = psutil.cpu_count(logical=True) + + # Prepare the row dictionary for the new CSV based on filename + new_row = { + "Dask Array Chunk Size (MB)": dask_array_chunk_size, + "Dask Num Workers": dask_num_workers, + "Omp Num Threads": omp_num_threads, + "Area": area, + "Resampling Algorithm": resampling_alg, + "Time (s)": round(float(data.loc[0, "Process Time"]) / scenes, 2), + "Avg Memory (GB)": round(data["Memory Usage (Byte)"].mean() / (1024 ** 3), 2), + "Max Memory (GB)": round(data["Memory Usage (Byte)"].max() / (1024 ** 3), 2), + "Avg CPU (%)": round(data["CPU Usage (%)"].mean() / cpu_thread, 2), + "Scenes": scenes, + "Errors": data.loc[0, "Errors"], + } + + df = pd.DataFrame([new_row]) + + return df + + +def combined_csv(work_dir, reader_name): + """Collect all the csv files under ``work_dir`` and merge them in to one dataframe.""" + all_dataframes = [] + csvs = glob.glob(f"{work_dir}/{reader_name.replace('_', '')}_chunk*_worker*_thread*_*_*.csv") + for file in csvs: + df = process_csv(file) + all_dataframes.append(df) + + if not all_dataframes: + return + + combined_df = pd.concat(all_dataframes, ignore_index=True) + + # Sort the DataFrame + # Make sure "original" area always comes first + combined_df["sort_priority"] = np.where(combined_df["Area"].str.contains("original"), 0, 1) + sorted_df = combined_df.sort_values(by=["sort_priority", "Area", "Resampling Algorithm", + "Dask Array Chunk Size (MB)", "Dask Num Workers", "Omp Num Threads"]) + + sorted_df.reset_index(drop=True, inplace=True) + + return sorted_df + + +def draw_hbar(dataframe, title): + """Plot the bar chart by matplotlib.""" + colors = FIGURES[title]["colors"] + key_x = "Chunk size - Num workers - Num Threads" + key_y = FIGURES[title]["key_y"] + + dpi = 100 + fig_width = 1080 / dpi + # Dynamic height according to the dataframe + num_bars = len(dataframe) + fig_height = max(600, 100 + 50 * num_bars) / dpi + fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=dpi) + plt.subplots_adjust(left=0.15, right=0.85, top=0.9, bottom=0.1) + + dataframe.plot.barh(x=key_x, y=key_y, legend=True if "Memory" in title else False, + ax=ax, width=0.5, color=colors, stacked=True if "Memory" in title else False) + plt.title(title, fontsize=16) + plt.ylabel(key_x, fontsize=14) + plt.xlabel("Memory (GB)" if "Memory" in title else key_y, fontsize=14) + ax.tick_params(axis="both", labelsize=12) + if "Memory" in title: + ax.legend(loc="upper right") + # Mark the position of physical memory limit + physical_memory = psutil.virtual_memory().total // (1024 ** 3) + ax.axvline(x=physical_memory, color="#808080", linestyle="--") + ax.text(physical_memory + 0.5, 1, "Physical\nMemory\nLimit", color="#808080") + if "CPU" in title: + ax.set_xlim([0, 100]) + + # Data label right to the bar + cumulative_widths = [0] * len(dataframe) + for i, container in enumerate(ax.containers): + for j, bar in enumerate(container): + width = bar.get_width() + cumulative_widths[j] = cumulative_widths[j] + width if "Memory" in title else width + label_x_pos = cumulative_widths[j] + 0.3 + + if i == 0: + # For "Time", "CPU" and "Avg Memory" + label_text = str(round(width, 2)) + else: + # For "Max Memory" + # Because in the dataframe for graph it's actually the difference between Max and Avg + # so that we can draw the "stacked" bars correctly. + # Now we have to restore the value to the real Max when writing the label. + label_text = str(round(cumulative_widths[j], 2)) + + ax.text(label_x_pos, bar.get_y() + bar.get_height() / 2, label_text, va="center") + + svg = BytesIO() + plt.savefig(svg, format="svg") + svg = svg.getvalue().decode("utf-8") + plt.close() + + return svg + + +def html_head(reader_name): + """Generate the html head of the report.""" + import dask + import pyresample + import pyspectral + import xarray as xr + + import satpy + + # Get system info + cpu_core = psutil.cpu_count(logical=False) + cpu_thread = psutil.cpu_count(logical=True) + cpu_info = cpuinfo.get_cpu_info() + cpu_model = cpu_info["brand_raw"] + memory_info = psutil.virtual_memory().total // (1024 ** 3) + os_info = platform.platform() + + # Get Python env + python_version = platform.python_version() + numpy_version = np.__version__ + dask_version = dask.__version__ + xarray_version = xr.__version__ + satpy_version = satpy.__version__ + pyresample_version = pyresample.__version__ + pyspectral_version = pyspectral.__version__ + psutil_version = psutil.__version__ + + html_content = f""" + + + + + Satpy Performance Test Report for {reader_name} + + + + +

Satpy Performance Test Report for {reader_name}

+

Generation Date: UTC {datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")}

+

1. System Info

+

1.1 Platform

+

CPU: {cpu_model}, {cpu_core} cores / {cpu_thread} threads in total

+

Physical Memory: {memory_info} GB

+

OS: {os_info}

+

1.2 Python Environment

+

Python: {python_version}

+

Numpy: {numpy_version}

+

Dask: {dask_version}

+

Xarray: {xarray_version}

+

Satpy: {satpy_version}

+

Pyresample: {pyresample_version}

+

Pyspectral: {pyspectral_version}

+

Psutil: {psutil_version}

+

2. Test Results

+ """ + + return html_content + + +def html_report(work_dir, reader_name): + """Analyze the summary dataframe and produce an HTML report.""" + df = combined_csv(work_dir, reader_name) + if df is None: + print("Test CSV result not found! Or its filename doesn't fit [*_chunk*_worker*_thread*_*_*.csv]") # noqa + return + # Group the dataframe for report + df["Group"] = "Area: " + df["Area"] + " - " + "Resampler: " + df["Resampling Algorithm"] + groups = df["Group"].unique() + + html_content = html_head(reader_name) + + for group in groups: + group_df = df[df["Group"] == group] + # Drop unnecessary column + group_df_table = group_df.drop(["Group", "Area", "Resampling Algorithm", "sort_priority"], + axis=1, inplace=False) + + group_df_graph = group_df.copy() + # Build a new column containing value group to make it easier for plotting the chart + group_df_graph["Chunk size - Num workers - Num Threads"] = ( + group_df_graph["Dask Array Chunk Size (MB)"].astype(str) + " - " + + group_df_graph["Dask Num Workers"].astype(str) + " - " + + group_df_graph["Omp Num Threads"].astype(str)) + group_df_graph = group_df_graph.sort_values(by=["Dask Array Chunk Size (MB)", "Dask Num Workers", + "Omp Num Threads"], ascending=False) + # For stacked bar + group_df_graph["Max Memory (GB)"] = group_df_graph["Max Memory (GB)"] - group_df_graph["Avg Memory (GB)"] + # Replace all the error rows with 0 so the chart will be significant in these rows + group_df_graph.loc[group_df_graph["Errors"] != "N/A", ["Time (s)", "Avg CPU (%)", + "Avg Memory (GB)", "Max Memory (GB)"]] = 0 + + group_html = group_df_table.to_html(index=False) + html_content += f""" +

2.{groups.tolist().index(group) + 1} {group}

+

2.{groups.tolist().index(group) + 1}.1 Table

+ {group_html} +

2.{groups.tolist().index(group) + 1}.2 Charts

+ """ + + # Plot three charts: time, cpu and mem + for title in FIGURES.keys(): + svg_bar = draw_hbar(group_df_graph, title) + html_content += f""" +
+ {svg_bar} +
+ """ + + # Finish HTML report + html_content += """ + + + """ + + # Save it to disk + with open(f"{work_dir}/{reader_name}_test_report.html", "w", encoding="utf-8") as f: + f.write(html_content) diff --git a/doc/rtd_environment.yml b/doc/rtd_environment.yml index abd8add616..94c5567ae3 100644 --- a/doc/rtd_environment.yml +++ b/doc/rtd_environment.yml @@ -12,14 +12,18 @@ dependencies: # 2.19.1 seems to cause library linking issues - eccodes>=2.20 - graphviz + - matplotlib-base - numba - numpy + - pandas - pillow - pooch + - psutil - pyresample - pytest - python-eccodes - python-geotiepoints + - py-cpuinfo - rasterio - rioxarray - setuptools diff --git a/doc/source/conf.py b/doc/source/conf.py index 62156e9760..e808c8bcfa 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -129,6 +129,9 @@ def __getattr__(cls, name): "--private", ] +# Additional api for 'performance_test' +sys.path.insert(0, os.path.abspath("../../benchmarks/")) + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/doc/source/index.rst b/doc/source/index.rst index 66a069fcda..296b931d9c 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -72,11 +72,13 @@ Documentation multiscene dev_guide/index + .. toctree:: :maxdepth: 1 Satpy API faq + performance_test Release Notes Security Policy diff --git a/doc/source/performance_test.rst b/doc/source/performance_test.rst new file mode 100644 index 0000000000..9104620de2 --- /dev/null +++ b/doc/source/performance_test.rst @@ -0,0 +1,149 @@ +================ +Performance Test +================ + +For better performace tweaks on specific readers, a tool ``performance_test`` under ``benchmarks`` is introduced here. +It involves ``DASK_ARRAY__CHUNK_SIZE``, ``DASK_NUM_WORKERS``, ``OMP_NUM_THREADS`` and other options mentioned in +:doc:`FAQ <../faq>`. This tool can loop through conditions defined by these values finally give a report in HTML. +The report contains tables and charts for better understanding. It has two types of tests: ``simple_test`` +and ``resampler_test``. + + +How it works? +============= +- The core is just a regular satpy script -- find datasets, load the composite, resample it if needed and + save it as geotiff. +- A monitor thread using ``psutil`` records the CPU and memory usage synchronously. The sample rate is around + 0.5 seconds. Any errors during the test will also be recorded. +- Each round has one single condition tested. The result is stored in a csv file. After that, the machine will + take a 1-min rest to let the CPU cool down. +- After all the tests finished, it collects all the result csv files, summarizing and visualizing them into the HTML + report. + + +Preparations +============ +1. Additional packages required +------------------------------- +- **psutil:** Record CPU/memory usage. +- **pandas:** Analyze test result. +- **matplotlib**: (Either ``matplotlib`` or ``matplotlib-base``) Plot the charts for report. +- **py-cpuinfo**: Get the CPU model for report. + + +2. Choose the composite and get corresponding datasets +------------------------------------------------------ +Usually the composite should be the one involving a lot of computation like atmospheric correction. For most of the +earth observing satellites, this could be ``true_color`` or something like that. + +Although one scene is enough to run the test, 3-5 scenes would be better to get the average. + +- For geostationary satellites, it is recommended to get those around **solar noon** under **full-disk** scan mode. +- For polar orbit satellites, scenes should be around the **same area** so the intensities of the computation are similar. + + +3. Organize the datasets +------------------------ +One scene per folder. All the dataset folders should have the same naming patterns, e.g.: + +.. code-block:: batch + + 2024/06/29 09:06 G16_s20241691700214_e20241691709522_FLDK + 2024/06/29 09:06 G16_s20241701700215_e20241701709523_FLDK + 2024/06/29 09:06 G16_s20241711700217_e20241711709525_FLDK + 2024/06/29 09:06 G16_s20241721700219_e20241721709527_FLDK + 2024/06/29 09:06 G16_s20241731700220_e20241731709528_FLDK + + +4. Do I have enough swap memory? +-------------------------------- +Some conditions or resamplers may consume a hell of physical memory and then swap. When both are at their limits, +the OS may just kill the test process without any warnings or errors recorded. + + +5. Arrange your time and work +----------------------------- +The whole test progress could last hours long depending on the reader, the datasets and the conditions. +Keep the machine free during this period. Avoid any unnecessary background jobs like software update. + + +Usage +===== +.. note:: + + Both ``simple_test`` and ``resampler_test`` collect all the results under ``work_dir`` and produce the report + in the same format. So if you already have some previous tests, just keep them in the same directory and the + test will merge them into one and refresh the report automatically. + +Initialize +---------- +.. autofunction:: performance_test.SatpyPerformanceTest.__init__ + +.. code-block:: python + + import os + from performance_test import SatpyPerformanceTest + + tester = SatpyPerformanceTest(work_dir="C:/Users/ABC/Downloads/Sat/Geo/ABI pef test", + folder_pattern="G16_s*_e*_FLDK", + reader_name="abi_l1b", + composite="true_color", + chunk_size_opts=[16, 64], + worker_opts=[8, 16]) + +simple_test +----------- +.. autofunction:: performance_test.SatpyPerformanceTest.simple_test + +.. code-block:: python + + # You can set some system environments related to satpy before running the test. + os.environ["PSP_CONFIG_FILE"] = "D:/satpy_config/pyspectral/pyspectral.yaml" + + tester.simple_test(diff_res=True) + +resampler_test +-------------- +.. autofunction:: performance_test.SatpyPerformanceTest.resampler_test + +.. code-block:: python + + from pyresample.geometry import AreaDefinition + + proj = "+proj=lcc +lon_0=-96 +lat_1=20 +lat_2=60 +datum=WGS84 +ellps=WGS84" + width = 8008 + height = 8008 + area_extent = (-106000, 2635000, 3898000, 6639000) + nprocs=8 + + area_def = AreaDefinition(area_id="NorthAmerica", proj_id="lcc", description="na", + projection=proj, width=width, height=height, area_extent=area_extent, nprocs=nprocs) + new_tester.resampler_test(resamplers=["bilinear", "ewa"], + area_def=area_def, + resampler_kwargs={ + "bilinear": {"cache_dir": "C:/Users/45107/Downloads/Sat/Geo/ABI pef test/cache"}, + "ewa": {"weight_delta_max": 40, "weight_distance_max": 2}, + }) +.. note:: + + When you test ``bilinear`` or ``nearest`` resampler on geostationary datasets and want to both accelerate the test + and exclude the impact of resampling cache, it is recommended to pre-build the cache with just one scene and + one condition. And by that, you can also have a chance to tell how big the difference is between with and + without cache (Sometimes, it's VERY, especially for ``bilinear``). + +How to test ``OMP_NUM_THREADS``? +-------------------------------- +``OMP_NUM_THREADS`` should be set outside the python script. In **Linux**, you can do it temporarily by + +.. code-block:: shell + + OMP_NUM_THREADS=4 python your_test_script.py + +In **Windows**: + +.. code-block:: batch + + set OMP_NUM_THREADS=4 && python your_test_script.py + +You can also choose not to set it. Normally the program will use as many logic cores as available. Either way, the test +will pick up the correct value and pass it to the report.