diff --git a/.github/workflows/deploy-image.yml b/.github/workflows/deploy-image.yml new file mode 100644 index 000000000..81c31c7ad --- /dev/null +++ b/.github/workflows/deploy-image.yml @@ -0,0 +1,41 @@ +name: Create and publish a Docker image + +on: + push: + branches: ['master'] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..0a5e34764 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +FROM continuumio/miniconda3 + +LABEL Description="ShapePipe Docker Image" +ENV SHELL /bin/bash + +ARG CC=gcc-9 +ARG CXX=g++-9 + +# gcc < 10 is required to compile ww +ENV CC=gcc-9 +ENV CXX=g++-9 + +RUN apt-get update --allow-releaseinfo-change && \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install apt-utils -y && \ + apt-get install make -y && \ + apt-get install automake -y && \ + apt-get install autoconf -y && \ + apt-get install gcc-9 g++-9 -y && \ + apt-get install locales -y && \ + apt-get install libgl1-mesa-glx -y && \ + apt-get install xterm -y && \ + apt-get install cmake protobuf-compiler -y && \ + apt-get install libtool libtool-bin libtool-doc -y && \ + apt-get install libfftw3-bin libfftw3-dev -y && \ + apt-get install libatlas-base-dev liblapack-dev libblas-dev -y && \ + apt-get install vim -y && \ + apt-get install locate -y && \ + apt-get install curl -y && \ + apt-get install acl -y && \ + apt-get install sssd -y && \ + apt-get clean + +ADD nsswitch.conf /etc/ + +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \ + locale-gen +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +SHELL ["/bin/bash", "--login", "-c"] + +COPY ./environment.yml ./ +COPY install_shapepipe README.rst setup.py setup.cfg ./ +RUN touch ./README.md + +RUN conda update -n base -c defaults conda -c defaults +RUN conda env create --file environment.yml + +COPY shapepipe ./shapepipe +COPY scripts ./scripts + +RUN source activate shapepipe +#RUN pip install jupyter diff --git a/auxdir/CFIS/tiles_202106/tiles_P7.txt b/auxdir/CFIS/tiles_202106/tiles_P7.txt index 535084a2e..cd8b6b5af 100644 --- a/auxdir/CFIS/tiles_202106/tiles_P7.txt +++ b/auxdir/CFIS/tiles_202106/tiles_P7.txt @@ -1,3 +1,4 @@ +000.000 052.332 053.331 053.332 diff --git a/docs/source/canfar.md b/docs/source/canfar.md new file mode 100644 index 000000000..1f490c504 --- /dev/null +++ b/docs/source/canfar.md @@ -0,0 +1,94 @@ +# Running `shapepipe` on the canfar science portal + +## Introduction + +## Steps from testing to parallel running + +Before starting a batch remote session job on a large number of images (step 5.), +it is recommended to perform some or all of the testing steps (1. - 4.). + + +1. Run the basic `shapepipe` runner script to test (one or several) modules in question, specified by a given config file, on one image. + This step has to be run in the image run directory. The command is + ```bash + shapepipe_run -c config.ini + ``` + +2. Run the job script to test the job management, on one image. + This step has to be run in the image run directory. The command is + ```bash + job_sp_canfar -j JOB [OPTIONS] + ``` + +3. Run the pipeline script to test the processing step(s), on one image. + This step has to be run in the patch base directory. + + 1. First, run in dry mode: + ```bash + init_run_exclusive_canfar.sh -j JOB -e ID -p [psfex|mccd] -k [tile|exp] -n + ``` + 2. Next, perform a real run with + ```bash + init_run_exclusive_canfar.sh -j JOB -e ID -p [psfex|mccd] -k [tile|exp] -n + ``` + +4. Run remote session script to test job submission using docker images, on one image. + This step has to be run in the patch base directory. + 1. First, run in dry mode=2, to display curl command, with + ```bash + curl_canfar_local.sh -j JOB -e ID -p [psfex|mccd] -k [tile|exp] -n 2 + ``` + + 2. Next, run in dry mode=1, to use curl command without processing: + ```bash + curl_canfar_local.sh -j JOB -e ID -p [psfex|mccd] -k [tile|exp] -n 1 + ``` + 3. Then, perform a real run, to use curl with processing: + ```bash + curl_canfar_local.sh -j JOB -e ID -p [psfex|mccd] -k [tile|exp] + ``` + +5. Full run: Call remote session script and docker image with collection of images + ```bash + curl_canfar_local.sh -j JOB -f path_IDs -p [psfex|mccd] -k [tile|exp] + ``` + with `path_IDs` being a text file with one image ID per line. + +## Monitoring + + +### Status and output of submitted job + +Monitoring of the currently active remote session can be performed using the session IDs `session_IDs.txt` written by the +remote session script `curl_canfar_local.sh`. In the patch main directory, run +```bash +curl_canfar_monitor.sh events +``` +to display the remotely started docker image status, and +```bash +curl_canfar_monitor.sh logs +``` +to print `stdout` of the remotely run pipeline script. + +### Number of submitted running jobs + +The script +```bash +stats_headless_canfar.py +``` +returns the number of actively running headless jobs. + + +## Post-hoc summary + +In the patch main directory, run +```bash +summary_run PATCH +``` +to print a summary with missing image IDs per job and module. + +## Deleting jobs + +```bash + for id in `cat session_IDs.txt`; do echo $id; curl -X DELETE -E /arc/home/kilbinger/.ssl/cadcproxy.pem https://ws-uv.canfar.net/skaha/v0/session/$id; done + ``` diff --git a/docs/source/pipeline_canfar.md b/docs/source/pipeline_canfar.md new file mode 100644 index 000000000..36a1a2025 --- /dev/null +++ b/docs/source/pipeline_canfar.md @@ -0,0 +1,95 @@ +patch="P7" +psf="psfex" +N_SMP=16 + +# Terminal title +echo -ne "\033]0;$patch\007" + +# Run directory +dir=~/cosmostat/v2/pre_v2/$psf/$patch +cd $dir + +# Get tile number list +ln -s ~/shapepipe/auxdir/CFIS/tiles_202106/tiles_$patch.txt tile_numbers.txt + + +# Get images + +## Download and link separately + +### Download +### Create and link to central image storage directory +mkdir -p ~/cosmostat/v2/data_tiles/$patch +ln -s ~/cosmostat/v2/data_tiles/$patch data_tiles + +### Download and move tiles +ln -s ~/shapepipe/example/cfis +mkdir -p output +export SP_RUN=`pwd` + +shapepipe_run -c cfis/config_Git_vos.ini +mv -i output/run_sp_Git_*/get_images_runner/output/CFIS.???.???.*fits* data_tiles +rm -rf output/run_sp_tiles_Git_* +update_run_log_file.py +# repeat the above block + +### Find exposures; this run can be stopped after Fe +shapepipe_run -c cfis/config_GitFe_symlink.ini + +### Download and move exposures + +shapepipe_run -c cfis/config_Gie_vos.ini +mv -i output/run_sp_Gie_*/get_images_runner/output/*.fits*fz data_exp +rm -rf output/run_sp_Gie_* +update_run_log_file.py +# repeat the above + +### Create links (and re-run Fe, not necessary) +job_sp_canfar.bash -p $psf `cat tile_numbers.txt` -j 1 -r symlink + +# Uncompress weights, split exposures into single HDUs +job_sp_canfar.bash -p $psf -n $N_SMP -j 2 + +# Mask tiles +job_sp_canfar.bash -p $psf -n $N_SMP -j 4 + +# If not finshed: +combine_runs.bash -p psfex -c flag +mv output/run_sp_combined_flag output/run_sp_exp_Ma + +# Mask exposures +job_sp_canfar.bash -p $psf -n $N_SMP -j 8 + + +# Tile detection +curl_canfar_local.sh -j 16 -f tile_numbers.txt -k tile -p $psf -N $N_SMP + + +# Exposure detection +## Get single-HDU single-exposure IDs +~/shapepipe/scripts/python/summary_run.py + +cp summary/missing_job_32_sextractor.txt all.txt +curl_canfar_local.sh -j 32 -f all.txt -k exp -p $psf -N $N_SMP + +# Tile preparation +curl_canfar_local.sh -j 64 -f tile_numbers.txt -k tile -p $psf -N $N_SMP + +# Tile shape measurement +curl_canfar_local.sh -j 128 -f tile_numbers.txt -k tile -p $psf -N 8 + +# Merge subcatalogues, and create final cat +curl_canfar_local.sh -j 256 -f tile_numbers.txt -k tile -p $psf -N $N_SMP + +# Combine all final cats in common output dir as links +combine_runs.bash -c final -p psfex + +# Merge all final cats +# (W3: 140GB RAM) +merge_final_cat -i output/run_sp_combined_final/make_catalog_runner/output -p cfis/final_cat.param -v + + +# Delete jobs +SSL=~/.ssl/cadcproxy.pem +SESSION=https://ws-uv.canfar.net/skaha/v0/session +for ID in `cat session_IDs.txt`; do echo $ID; curl -X DELETE -E $SSL $SESSION/$ID; done diff --git a/docs/source/post_processing.md b/docs/source/post_processing.md index ebd123889..8d9922dbb 100644 --- a/docs/source/post_processing.md +++ b/docs/source/post_processing.md @@ -3,63 +3,14 @@ This page shows all required steps of post-processing the results from one or more `ShapePipe` runs. Post-processing combines various individual `ShapePipe` output files, and creates joint results, for example combining individual tile -catalogues in a large sky area. The output of post-processing is a joint _shape +catalogues into a large sky area. The output of post-processing is a joint _shape catalogue_, containing all required information to create a calibrated shear catalogue via _metacalibration_), a joint star catalogue, and PSF diagnostic plots. -Some of the following steps pertain specifically to runs carried out on [canfar](https://www.canfar.net/en), -but most are general. +If main ShapePipe processing happened at the old canfar VM system (e.g. CFIS v0 and v1), go +[here](vos_retrieve.md) for details how to retrieve the ShapePipe output files. -1. Retrieve `ShapePipe` result files - - For a local run on the same machine as for post-processing, nothing needs to be done. - In some cases, the run was carried out on a remote machine or cluster, and the resulting `ShapePipe` - output files need to be retrieved. - - In the specific case of canfar_avail_results.py, this is done as follows. - - A. Check availability of results - - A `canfar` job can submit a large number of tiles, whose processing time can vary a lot. - We assume that the submitted tile ID list is available locally via the ascii file `tile_numbers.txt`. - To check which tiles have finished running, and whose results have been uploaded, use - ```bash - canfar_avail_results -i tile_numbers.txt -v -p PSF --input_path INPUT_PATH - ``` - where PSF is one in [`psfex`|`mccd`], and INPUT_PATH the input path on vos, default `vos:cfis/cosmostat/kilbinger/results`. - See `-h` for all options. - - B. Download results - - All results files will be downloaded with - ```bash - canfar_download_results -i tile_numbers.txt -v -p PSF --input_vos INPUT_VOS - ``` - Use the same options as for same as for `canfar_avail_results`. - - This command can be run in the same directory at subsequent times, to complete an ongoing run: Only newer files will be downloaded - from the `vos` directory. This also assures that partially downloaded or corrupt files will be replaced. - - Checking the `vos` directorty can be slow for large patches. - To only download files that are not yet present locally (in `.`), first write the missing ones to an ascii file, using again the - script `canfar_avail_results`, but this time with `.` as input path: - ```bash - canfar_avail_results -i tile_numbers.txt --input_path . -p PSF -v -o missing.txt - ''' - Then, download only the missing files with - ```bash - canfar_download_results -i missing.txt --input_vos cosmostat/kilbinger/results_mccd_oc2 -p mccd -v - ``` - - C. Un-tar results - ```bash - untar_results -p PSF - ``` - On success, `ShapePipe` output `fits` and `log` files will be now in various subdirs of the `output` directory. - -At this step all required `ShapePipe` resulting output files are available in the current working directory. - -2. Optional: Split output in sub-samples +1. Optional: Split output into sub-samples An optional intermediate step is to create directories for sub-samples, for example one directory for each patch on the sky. This will create symbolic links to the results `.tgz` files downloaded in @@ -70,33 +21,34 @@ At this step all required `ShapePipe` resulting output files are available in th ``` The following steps will then be done in the directory `tiles_W3`. -3. Run PSF diagnostics, create merged catalogue +2. Run PSF diagnostics, create merged catalogue Type ```bash post_proc_sp -p PSF ``` - to automatically perform a number of post-processing steps. Chose the PSF model with the option + to automatically perform a number of post-processing steps. Choose the PSF model with the option `-p psfex|mccd`. In detail, these are (and can also be done individually by hand): - A. Analyse psf validation files + 1. Analyse psf validation files ```bash - prepare_star_cat -p PSF + combine_runs -t psf -p PSF ``` with options as for `post_proc_sp`. - This script identifies all psf validation files (from all processed tiles downloaded to `pwd`), creates symbolic links, - merges the catalogues, and creates plots of PSF ellipticity, size, and residuals over the focal plane. + This script creates a new combined psf run in the ShapePipe `output` directory, by identifying all psf validation files + and creating symbolic links. The run log file is updated. - B. Create plots of the PSF and their residuals in the focal plane, as a diagnostic of the overall PSF model. - As a scale-dependend test, which propagates directly to the shear correlation function, the rho statistics are computed, - see {cite:p}`rowe:10` and {cite:p}`jarvis:16`, + 3. Merge individual psf validation files into one catalogue. Create plots of the PSF and their residuals in the focal plane, + as a diagnostic of the overall PSF model. + As a scale-dependend test, which propagates directly to the shear correlation function, the rho statistics are computed, + see {cite:p}`rowe:10` and {cite:p}`jarvis:16`, ```bash shapepipe_run -c /path/to/shapepipe/example/cfis/config_MsPl_PSF.ini ``` - C. Prepare output directory + 4. Prepare output directory Create links to all 'final_cat' result files with ```bash @@ -105,7 +57,7 @@ At this step all required `ShapePipe` resulting output files are available in th The corresponding output directory that is created is `output/run_sp_combined/make_catalog_runner/output`. On success, it contains links to all `final_cat` output catalogues - D. Merge final output files + 5. Merge final output files Create a single main shape catalog: ```bash diff --git a/docs/source/vos_retrieve.md b/docs/source/vos_retrieve.md new file mode 100644 index 000000000..3606752c8 --- /dev/null +++ b/docs/source/vos_retrieve.md @@ -0,0 +1,51 @@ +## Retrieve files from VOspace + +This page describes how ShapePipe output files can be retrieved via the Virtual Observatory Space +on canfar. This system was used for the CFIS v0 and v1 runs, and is now obsolete. + +1. Retrieve ShapePipe result files + + For a local run on the same machine as for post-processing, nothing needs to be done. In some cases, the run was carried out on a remote machine or cluster, and the resulting ShapePipe output files + need to be retrieved. + + In the specific case of canfar_avail_results.py, this is done as follows. + + 1. Check availability of results + + A canfar job can submit a large number of tiles, whose processing time can vary a lot. We assume that the submitted tile ID list is available locally via the ascii file tile_numbers.txt. To check + which tiles have finished running, and whose results have been uploaded, use + ```bash + canfar_avail_results -i tile_numbers.txt -v -p PSF --input_path INPUT_PATH + ``` + where PSF is one in [`psfex`|`mccd`], and INPUT_PATH the input path on vos, default `vos:cfis/cosmostat/kilbinger/results`. + See `-h` for all options. + + 2. Download results + + All results files will be downloaded with + ```bash + canfar_download_results -i tile_numbers.txt -v -p PSF --input_vos INPUT_VOS + ``` + Use the same options as for same as for `canfar_avail_results`. + + This command can be run in the same directory at subsequent times, to complete an ongoing run: Only newer files will be downloaded + from the `vos` directory. This also assures that partially downloaded or corrupt files will be replaced. + + Checking the `vos` directorty can be slow for large patches. + To only download files that are not yet present locally (in `.`), first write the missing ones to an ascii file, using again the + script `canfar_avail_results`, but this time with `.` as input path: + ```bash + canfar_avail_results -i tile_numbers.txt --input_path . -p PSF -v -o missing.txt + ``` + Then, download only the missing files with + ```bash + canfar_download_results -i missing.txt --input_vos cosmostat/kilbinger/results_mccd_oc2 -p mccd -v + ``` + + 3. Un-tar results + ```bash + untar_results -p PSF + ``` + On success, `ShapePipe` output `fits` and `log` files will be now in various subdirs of the `output` directory. + +At this step all required `ShapePipe` resulting output files are available in the current working directory. diff --git a/environment.yml b/environment.yml index 9279402ef..548c7e618 100644 --- a/environment.yml +++ b/environment.yml @@ -4,30 +4,30 @@ channels: dependencies: - python=3.9 - pip>=21.2.4 - - numpy==1.21.6 - - astropy==5.0 - - automake==1.16.2 - - autoconf==2.69 - - cmake==3.19.6 - - galsim==2.2.5 - - joblib==1.1.0 - - libtool==2.4.6 - - matplotlib==3.5.1 - - numba==0.54.1 - - pandas==1.4.1 + - astromatic-psfex==3.21.1 + - astromatic-source-extractor==2.25.0 + - astropy==5.1 + - automake + - autoconf + - cmake + - galsim + - joblib + - libtool + - matplotlib + - numba + - numpy==1.22 - pip: - - cs_util==0.0.5 - - mccd==1.2.3 - - modopt==1.6.0 - - PyQt5==5.15.6 - - pyqtgraph==0.12.4 - - python-pysap==0.0.6 - - reproject==0.8 - - sip_tpv==1.1 - - sf_tools==2.0.4 - - sqlitedict==2.0.0 - - termcolor==1.1.0 - - tqdm==4.63.0 - - treecorr==4.2.6 + - cs_util + - modopt + - PyQt5 + - pyqtgraph + - reproject + - skaha + - sip_tpv + - sf_tools + - sqlitedict + - termcolor + - tqdm + - treecorr - git+https://github.com/aguinot/ngmix@stable_version - git+https://github.com/tobias-liaudat/Stile@v0.1 diff --git a/example/cfis/config_Gie_vos.ini b/example/cfis/config_Gie_vos.ini new file mode 100644 index 000000000..ee4b92f30 --- /dev/null +++ b/example/cfis/config_Gie_vos.ini @@ -0,0 +1,103 @@ +# ShapePipe configuration file for: get images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_Gie + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get exposures +[GET_IMAGES_RUNNER] + +INPUT_DIR = last:find_exposures_runner + +FILE_PATTERN = exp_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + + +# Paths + +# Output path (optional, default is [FILE]:OUTPUT_DIR +# OUTPUT_PATH = input_images + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = vos:cfis/pitcairn, vos:cfis/weights, vos:cfis/flags +# LSB images: +#INPUT_PATH = vos:cfis/lsb_individual, vos:cfis/weights, vos:cfis/flags + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = 000000, 000000.weight, 000000.flag +# LSB images +#INPUT_FILE_PATTERN = 000000s, 000000p.weight, 000000p.flag + +# Input file extensions +INPUT_FILE_EXT = .fits.fz, .fits.fz, .fits.fz + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{6} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = image-, weight-, flag- + +# Method to retrieve images, one in 'vos', 'symlink' +RETRIEVE = vos + +# If RETRIEVE=vos, number of attempts to download +# Optional, default=3 +N_TRY = 3 + +# Retrieve command options, optional +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem + +CHECK_EXISTING_DIR = $SP_RUN/data_exp diff --git a/example/cfis/config_GitFeGie_symlink.ini b/example/cfis/config_GitFeGie_symlink.ini index 22e834533..6f84d1482 100644 --- a/example/cfis/config_GitFeGie_symlink.ini +++ b/example/cfis/config_GitFeGie_symlink.ini @@ -80,6 +80,8 @@ INPUT_NUMBERING = \d{3}\.\d{3} # Output file pattern without number OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- +#CHECK_EXISTING_DIR = $SP_RUN/output/run_sp_Git/get_images_runner_run_1/output + # Copy/download method, one in 'vos', 'symlink' RETRIEVE = symlink @@ -98,8 +100,12 @@ FILE_EXT = .fits # NUMBERING_SCHEME (optional) string with numbering pattern for input files NUMBERING_SCHEME = -000-000 +# Column number of exposure name in FITS header COLNUM = 3 +# Prefix to remove from exposure name +EXP_PREFIX = p + # Get exposures [GET_IMAGES_RUNNER_RUN_2] diff --git a/example/cfis/config_GitFeGie_vos.ini b/example/cfis/config_GitFeGie_vos.ini index fd3207bb3..75044bf44 100644 --- a/example/cfis/config_GitFeGie_vos.ini +++ b/example/cfis/config_GitFeGie_vos.ini @@ -66,7 +66,7 @@ NUMBERING_SCHEME = # Input path where original images are stored. Can be local path or vos url. # Single string or list of strings -INPUT_PATH = vos:cfis/tiles_DR3, vos:cfis/tiles_DR3 +INPUT_PATH = vos:cfis/tiles_DR5, vos:cfis/tiles_DR5 # Input file pattern including tile number as dummy template INPUT_FILE_PATTERN = CFIS.000.000.r, CFIS.000.000.r.weight @@ -84,8 +84,9 @@ OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- RETRIEVE = vos # Copy command options, optional -RETRIEVE_OPTIONS = --certfile=$VM_HOME/.ssl/cadcproxy.pem +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem +CHECK_EXISTING_DIR = $SP_RUN/output/run_sp_Git/get_images_runner_run_1/output [FIND_EXPOSURES_RUNNER] @@ -98,6 +99,12 @@ FILE_EXT = .fits # NUMBERING_SCHEME (optional) string with numbering pattern for input files NUMBERING_SCHEME = -000-000 +# Column number of exposure name in FITS header +COLNUM = 3 + +# Prefix to remove from exposure name +EXP_PREFIX = p + # Get exposures [GET_IMAGES_RUNNER_RUN_2] @@ -145,4 +152,4 @@ RETRIEVE = vos N_TRY = 3 # Retrieve command options, optional -RETRIEVE_OPTIONS = --certfile=$VM_HOME/.ssl/cadcproxy.pem +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem diff --git a/example/cfis/config_GitFe_symlink.ini b/example/cfis/config_GitFe_symlink.ini new file mode 100644 index 000000000..4e7ce75cb --- /dev/null +++ b/example/cfis/config_GitFe_symlink.ini @@ -0,0 +1,105 @@ +# ShapePipe configuration file for: get images and find exposures + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_GitFe + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner, find_exposures_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get tiles +[GET_IMAGES_RUNNER] + +FILE_PATTERN = tile_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = + +# Paths + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = $SP_RUN/data_tiles, $SP_RUN/data_tiles + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = CFIS.000.000.r, CFIS.000.000.r.weight + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits.fz + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{3}\.\d{3} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- + +# Copy/download method, one in 'vos', 'symlink' +RETRIEVE = symlink + +# Copy command options, optional +RETRIEVE_OPTIONS = -L + + +[FIND_EXPOSURES_RUNNER] + +INPUT_MODULE = get_images_runner + +FILE_PATTERN = CFIS_image + +FILE_EXT = .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Column number of exposure name in FITS header +COLNUM = 3 + +# Prefix to remove from exposure name +EXP_PREFIX = p diff --git a/example/cfis/config_Git_vos.ini b/example/cfis/config_Git_vos.ini new file mode 100644 index 000000000..26edf3efc --- /dev/null +++ b/example/cfis/config_Git_vos.ini @@ -0,0 +1,93 @@ +# ShapePipe configuration file for: get images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_Git + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get tiles +[GET_IMAGES_RUNNER] + +FILE_PATTERN = tile_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = + +# Paths + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = vos:cfis/tiles_DR5, vos:cfis/tiles_DR5 + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = CFIS.000.000.r, CFIS.000.000.r.weight + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits.fz + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{3}\.\d{3} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- + +# Copy/download method, one in 'vos', 'symlink' +RETRIEVE = vos + +# If RETRIEVE=vos, number of attempts to download +# Optional, default=3 +N_TRY = 3 + +# Copy command options, optional +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem + +CHECK_EXISTING_DIR = $SP_RUN/data_tiles diff --git a/example/cfis/config_MaMa_onthefly.ini b/example/cfis/config_MaMa_onthefly.ini index bde813940..84f117e65 100644 --- a/example/cfis/config_MaMa_onthefly.ini +++ b/example/cfis/config_MaMa_onthefly.ini @@ -44,7 +44,7 @@ OUTPUT_DIR = $SP_RUN/output [JOB] # Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial -SMP_BATCH_SIZE = 8 +SMP_BATCH_SIZE = 16 # Timeout value (optional), default is None, i.e. no timeout limit applied TIMEOUT = 96:00:00 diff --git a/example/cfis/config_MsPl_psfex.ini b/example/cfis/config_MsPl_psfex.ini index 7fcb3ba20..d21fdd126 100644 --- a/example/cfis/config_MsPl_psfex.ini +++ b/example/cfis/config_MsPl_psfex.ini @@ -35,7 +35,7 @@ LOG_NAME = log_sp RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names -INPUT_DIR = $SP_RUN/psf_validation_ind +INPUT_DIR = $SP_RUN/output # Output directory OUTPUT_DIR = $SP_RUN/output @@ -54,7 +54,7 @@ TIMEOUT = 96:00:00 ## Module options [MERGE_STARCAT_RUNNER] -INPUT_DIR = psf_validation_ind +INPUT_DIR = last:psfex_interp_runner PSF_MODEL = psfex diff --git a/example/cfis/config_Ms_psfex.ini b/example/cfis/config_Ms_psfex.ini new file mode 100644 index 000000000..5a105e773 --- /dev/null +++ b/example/cfis/config_Ms_psfex.ini @@ -0,0 +1,68 @@ +# ShapePipe configuration file for post-processing. +# merge star cat. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_Ms + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = merge_starcat_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 4 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options +[MERGE_STARCAT_RUNNER] + +INPUT_DIR = last:psfex_interp_runner + +PSF_MODEL = psfex + +NUMBERING_SCHEME = -0000000-0 + +# Input file pattern(s), list of strings with length matching number of expected input file types +# Cannot contain wild cards +FILE_PATTERN = validation_psf + +# FILE_EXT (optional) list of string extensions to identify input files +FILE_EXT = .fits diff --git a/example/cfis/config_exp_Ma_onthefly.ini b/example/cfis/config_exp_Ma_onthefly.ini new file mode 100644 index 000000000..5f629500d --- /dev/null +++ b/example/cfis/config_exp_Ma_onthefly.ini @@ -0,0 +1,76 @@ +# ShapePipe configuration file for masking of exposures + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_Ma + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = mask_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +### Mask exposures +[MASK_RUNNER] + +# Parent module +INPUT_DIR = last:split_exp_runner + +# Update numbering convention, accounting for HDU number of +# single-exposure single-HDU files +NUMBERING_SCHEME = -0000000-0 + +# Path of mask config file +MASK_CONFIG_PATH = $SP_CONFIG/config_onthefly.mask + +# External mask file flag, use if True, otherwise ignore +USE_EXT_FLAG = True + +# External star catalogue flag, use external cat if True, +# obtain from online catalogue if False +USE_EXT_STAR = False + +# File name suffix for the output flag files (optional) +PREFIX = pipeline diff --git a/example/cfis/config_exp_Pi.ini b/example/cfis/config_exp_Pi.ini new file mode 100644 index 000000000..12056341b --- /dev/null +++ b/example/cfis/config_exp_Pi.ini @@ -0,0 +1,82 @@ +# ShapePipe configuration file for single-HDU single exposure images.. +# PSFex PSF model; interpolation for validation. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_Pi + +# Add date and time to RUN_NAME, optional, default: True +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = psfex_interp_runner + + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[PSFEX_INTERP_RUNNER] + +# Use 20% sample for PSF validation +FILE_PATTERN = star_split_ratio_80, star_split_ratio_20, psfex_cat + +FILE_EXT = .psf, .fits, .cat + +NUMBERING_SCHEME = -0000000-0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = VALIDATION + +# Column names of position parameters +POSITION_PARAMS = XWIN_IMAGE,YWIN_IMAGE + +# If True, measure and store ellipticity of the PSF (using moments) +GET_SHAPES = True + +# Minimum number of stars per CCD for PSF model to be computed +STAR_THRESH = 22 + +# Maximum chi^2 for PSF model to be computed on CCD +CHI2_THRESH = 2 diff --git a/example/cfis/config_exp_SpMh.ini b/example/cfis/config_exp_SpMh.ini index 76f87ddd7..add375bfd 100644 --- a/example/cfis/config_exp_SpMh.ini +++ b/example/cfis/config_exp_SpMh.ini @@ -57,13 +57,15 @@ TIMEOUT = 96:00:00 INPUT_DIR = last:get_images_runner_run_2 +FILE_PATTERN = image, weight, flag + # Matches compressed single-exposure files FILE_EXT = .fitsfz, .fitsfz, .fitsfz NUMBERING_SCHEME = -0000000 # OUTPUT_SUFFIX, actually file name prefixes. -# Expected keyword "flag" will lead to a behavior where the data are save as int. +# Expected keyword "flag" will lead to a behavior where the data are saved as int. # The code also expects the image data to use the "image" suffix # (default value in the pipeline). OUTPUT_SUFFIX = image, weight, flag diff --git a/example/cfis/config_exp_psfex.ini b/example/cfis/config_exp_psfex.ini new file mode 100644 index 000000000..a5ac5d8a1 --- /dev/null +++ b/example/cfis/config_exp_psfex.ini @@ -0,0 +1,177 @@ +# ShapePipe configuration file for single-exposures. PSFex PSF model. +# Process exposures after masking, from star detection to PSF model. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_exp_SxSePsfPi +#RUN_NAME = run_sp_exp_SxSePsf + +# Add date and time to RUN_NAME, optional, default: True +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = sextractor_runner, setools_runner, psfex_runner, psfex_interp_runner + + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[SEXTRACTOR_RUNNER] + +# Input from two modules +INPUT_DIR = last:split_exp_runner, run_sp_Ma_exp:mask_runner + +# Read pipeline flag files created by mask module +FILE_PATTERN = image, weight, pipeline_flag + +NUMBERING_SCHEME = -0000000-0 + +# SExtractor executable path +EXEC_PATH = sex + +# SExtractor configuration files +DOT_SEX_FILE = $SP_CONFIG/default_exp.sex +DOT_PARAM_FILE = $SP_CONFIG//default.param +DOT_CONV_FILE = $SP_CONFIG/default.conv + +# Use input weight image if True +WEIGHT_IMAGE = True + +# Use input flag image if True +FLAG_IMAGE = True + +# Use input PSF file if True +PSF_FILE = False + +# Use distinct image for detection (SExtractor in +# dual-image mode) if True. +DETECTION_IMAGE = False + +# Distinct weight image for detection (SExtractor +# in dual-image mode) +DETECTION_WEIGHT = False + +# True if photometry zero-point is to be read from exposure image header +ZP_FROM_HEADER = True + +# If ZP_FROM_HEADER is True, zero-point key name +ZP_KEY = PHOTZP + +# Background information from image header. +# If BKG_FROM_HEADER is True, background value will be read from header. +# In that case, the value of BACK_TYPE will be set atomatically to MANUAL. +# This is used e.g. for the LSB images. +BKG_FROM_HEADER = False +# LSB images: +# BKG_FROM_HEADER = True + +# If BKG_FROM_HEADER is True, background value key name +# LSB images: +#BKG_KEY = IMMODE + +# Type of image check (optional), default not used, can be a list of +# BACKGROUND, BACKGROUND_RMS, INIBACKGROUND, MINIBACK_RMS, -BACKGROUND, +# FILTERED, OBJECTS, -OBJECTS, SEGMENTATION, APERTURES +CHECKIMAGE = BACKGROUND + +# File name suffix for the output sextractor files (optional) SUFFIX = tile +SUFFIX = sexcat + +## Post-processing + +# Not required for single exposures +MAKE_POST_PROCESS = FALSE + + +[SETOOLS_RUNNER] + +INPUT_DIR = last:sextractor_runner + +# Note: Make sure this doe not match the SExtractor background images +# (sexcat_background*) +FILE_PATTERN = sexcat + +NUMBERING_SCHEME = -0000000-0 + +# SETools config file +SETOOLS_CONFIG_PATH = $SP_CONFIG/star_selection.setools + + +[PSFEX_RUNNER] + +# Use 80% sample for PSF model +FILE_PATTERN = star_split_ratio_80 + +NUMBERING_SCHEME = -0000000-0 + +# Path to executable for the PSF model (optional) +EXEC_PATH = psfex + +# Default psfex configuration file +DOT_PSFEX_FILE = $SP_CONFIG/default.psfex + +[PSFEX_INTERP_RUNNER] + +# Use 20% sample for PSF validation +FILE_PATTERN = star_split_ratio_80, star_split_ratio_20, psfex_cat + +FILE_EXT = .psf, .fits, .cat + +NUMBERING_SCHEME = -0000000-0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = VALIDATION + +# Column names of position parameters +POSITION_PARAMS = XWIN_IMAGE,YWIN_IMAGE + +# If True, measure and store ellipticity of the PSF (using moments) +GET_SHAPES = True + +# Minimum number of stars per CCD for PSF model to be computed +STAR_THRESH = 22 + +# Maximum chi^2 for PSF model to be computed on CCD +CHI2_THRESH = 2 diff --git a/example/cfis/config_get_tiles_vos_headers.ini b/example/cfis/config_get_tiles_vos_headers.ini index e28347ebf..4b79a2493 100644 --- a/example/cfis/config_get_tiles_vos_headers.ini +++ b/example/cfis/config_get_tiles_vos_headers.ini @@ -69,7 +69,7 @@ NUMBERING_SCHEME = # Input path where original images are stored. Can be local path or vos url. # Single string or list of strings -INPUT_PATH = vos:cfis/tiles_DR3 +INPUT_PATH = vos:cfis/tiles_DR5 # Input file pattern including tile number as dummy template INPUT_FILE_PATTERN = CFIS.000.000.r diff --git a/example/cfis/config_make_cat_psfex.ini b/example/cfis/config_make_cat_psfex.ini index fdf5414b5..a7407d990 100644 --- a/example/cfis/config_make_cat_psfex.ini +++ b/example/cfis/config_make_cat_psfex.ini @@ -55,7 +55,7 @@ TIMEOUT = 96:00:00 [MAKE_CAT_RUNNER] # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = last:sextractor_runner_run_1, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards diff --git a/example/cfis/config_merge_sep_cats_template.ini b/example/cfis/config_merge_sep_cats_template.ini index e14e5fe74..36b99e8da 100644 --- a/example/cfis/config_merge_sep_cats_template.ini +++ b/example/cfis/config_merge_sep_cats_template.ini @@ -35,7 +35,6 @@ RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN INPUT_DIR = ./output/run_sp_tile_ngmix_Ng1u/ngmix_runner/output -#, ./output/run_sp_tile_ngmix_Ng1u/galsim_shapes_v2_runner/output # Output directory OUTPUT_DIR = ./output @@ -58,11 +57,9 @@ TIMEOUT = 96:00:00 # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards FILE_PATTERN = ngmix -#, galsim # FILE_EXT (optional) list of string extensions to identify input files FILE_EXT = .fits -#, .fits # Numbering convention, string that exemplifies a numbering pattern. NUMBERING_SCHEME = -000-000 diff --git a/example/cfis/config_tile_Ma_onthefly.ini b/example/cfis/config_tile_Ma_onthefly.ini new file mode 100644 index 000000000..1b1fbbbea --- /dev/null +++ b/example/cfis/config_tile_Ma_onthefly.ini @@ -0,0 +1,82 @@ +# ShapePipe configuration file for masking of tiles + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Ma + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = mask_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 8 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +### Mask tiles +[MASK_RUNNER] + +# Input directory, containing input files, single string or list of names +INPUT_DIR = last:get_images_runner_run_1, last:uncompress_fits_runner + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Input file pattern(s), list of strings with length matching number of expected input file types +# Cannot contain wild cards +FILE_PATTERN = CFIS_image, CFIS_weight + +# FILE_EXT (optional) list of string extensions to identify input files +FILE_EXT = .fits, .fits + +# Path of mask config file +MASK_CONFIG_PATH = $SP_CONFIG/config_tile_onthefly.mask + +# External mask file flag, use if True, otherwise ignore +USE_EXT_FLAG = False + +# External star catalogue flag, use external cat if True, +# obtain from online catalogue if False +USE_EXT_STAR = False + +# File name suffix for the output flag files (optional) +PREFIX = pipeline diff --git a/example/cfis/config_tile_Ng_template.ini b/example/cfis/config_tile_Ng_template.ini index f9d733d71..5ad7aaf58 100644 --- a/example/cfis/config_tile_Ng_template.ini +++ b/example/cfis/config_tile_Ng_template.ini @@ -55,7 +55,7 @@ TIMEOUT = 96:00:00 # Model-fitting shapes with ngmix [NGMIX_RUNNER] -INPUT_DIR = last:sextractor_runner_run_1,last:X_interp_runner,last:vignetmaker_runner_run_2 +INPUT_DIR = run_sp_tile_Sx:sextractor_runner,last:X_interp_runner,last:vignetmaker_runner_run_2 FILE_PATTERN = sexcat, image_vignet, background_vignet, galaxy_psf, weight_vignet, flag_vignet @@ -75,25 +75,3 @@ PIXEL_SCALE = 0.186 ID_OBJ_MIN = X ID_OBJ_MAX = X - - -# Moment-based (KSB) shapes with galsim -[GALSIM_SHAPES_V2_RUNNER] - -INPUT_DIR = last:sextractor_runner_run_2, last:vignetmaker_runner_run_1, last:X_interp_runner,last:vignetmaker_runner_run_2 - -FILE_PATTERN = sexcat, weight_vignet, image_vignet, background_vignet, galaxy_psf, weight_vignet, flag_vignet - -FILE_EXT = .fits, .fits, .sqlite, .sqlite, .sqlite, .sqlite, .sqlite - -# NUMBERING_SCHEME (optional) string with numbering pattern for input files -NUMBERING_SCHEME = -000-000 - -# Multi-epoch mode: Path to file with single-exposure WCS header information -LOG_WCS = $SP_RUN/output/log_exp_headers.sqlite - -# Magnitude zero-point -MAG_ZP = 30.0 - -ID_OBJ_MIN = X -ID_OBJ_MAX = X diff --git a/example/cfis/config_tile_PiViSmVi_canfar.ini b/example/cfis/config_tile_PiViSmVi_canfar.ini new file mode 100644 index 000000000..51900b525 --- /dev/null +++ b/example/cfis/config_tile_PiViSmVi_canfar.ini @@ -0,0 +1,188 @@ +# ShapePipe configuration file for tile, from detection up to shape measurement. +# PSFEx PSF model. + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_PsViSmVi + +# Add date and time to RUN_NAME, optional, default: False +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +#MODULE = psfex_interp_runner, + +MODULE = psfex_interp_runner, vignetmaker_runner, spread_model_runner, + vignetmaker_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = . + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 16 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[PSFEX_INTERP_RUNNER] + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner + +FILE_PATTERN = sexcat + +FILE_EXT = .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = MULTI-EPOCH + +# Column names of position parameters +POSITION_PARAMS = XWIN_WORLD,YWIN_WORLD + +# If True, measure and store ellipticity of the PSF +GET_SHAPES = True + +# Number of stars threshold +STAR_THRESH = 20 + +# chi^2 threshold +CHI2_THRESH = 2 + +# Multi-epoch mode parameters + +ME_DOT_PSF_DIR = all:psfex_runner + +# Input psf file pattern +ME_DOT_PSF_PATTERN = star_split_ratio_80 + +# Multi-epoch mode: Path to file with single-exposure WCS header information +ME_LOG_WCS = $SP_RUN/output/log_exp_headers.sqlite + + +# Create vignets for tiles weights +[VIGNETMAKER_RUNNER_RUN_1] + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:uncompress_fits_runner + +FILE_PATTERN = sexcat, CFIS_weight + +FILE_EXT = .fits, .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +MASKING = False +MASK_VALUE = 0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = CLASSIC + +# Coordinate frame type, one in PIX (pixel frame), SPHE (spherical coordinates) +COORD = PIX +POSITION_PARAMS = XWIN_IMAGE,YWIN_IMAGE + +# Vignet size in pixels +STAMP_SIZE = 51 + +# Output file name prefix, file name is _vignet.fits +PREFIX = weight + + +[SPREAD_MODEL_RUNNER] + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:psfex_interp_runner, last:vignetmaker_runner_run_1 + +FILE_PATTERN = sexcat, galaxy_psf, weight_vignet + +FILE_EXT = .fits, .sqlite, .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Pixel scale in arcsec +PIXEL_SCALE = 0.186 + +# Output mode: +# new: create a new catalog with: [number, mag, sm, sm_err] +# add: create a copy of the input SExtractor with the column sm and sm_err +OUTPUT_MODE = new + + +[VIGNETMAKER_RUNNER_RUN_2] + +# Create multi-epoch vignets for tiles corresponding to +# positions on single-exposures + +INPUT_DIR = run_sp_tile_Sx:sextractor_runner + +FILE_PATTERN = sexcat + +FILE_EXT = .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +MASKING = False +MASK_VALUE = 0 + +# Run mode for psfex interpolation: +# CLASSIC: 'classical' run, interpolate to object positions +# MULTI-EPOCH: interpolate for multi-epoch images +# VALIDATION: validation for single-epoch images +MODE = MULTI-EPOCH + +# Coordinate frame type, one in PIX (pixel frame), SPHE (spherical coordinates) +COORD = SPHE +POSITION_PARAMS = XWIN_WORLD,YWIN_WORLD + +# Vignet size in pixels +STAMP_SIZE = 51 + +# Output file name prefix, file name is vignet.fits +PREFIX = + +# Additional parameters for path and file pattern corresponding to single-exposure +# run outputs +ME_IMAGE_DIR = last:split_exp_runner, last:split_exp_runner, last:split_exp_runner, all:sextractor_runner +ME_IMAGE_PATTERN = flag, image, weight, background +ME_LOG_WCS = $SP_RUN/output/log_exp_headers.sqlite diff --git a/example/cfis/config_tile_Sx.ini b/example/cfis/config_tile_Sx.ini new file mode 100644 index 000000000..a079c2488 --- /dev/null +++ b/example/cfis/config_tile_Sx.ini @@ -0,0 +1,117 @@ +# ShapePipe configuration file for tile detection + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = True + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_tile_Sx + +# Add date and time to RUN_NAME, optional, default: True +; RUN_DATETIME = False + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = sextractor_runner + + +# Run mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names with length matching FILE_PATTERN +INPUT_DIR = $SP_RUN/output + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 2 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +[SEXTRACTOR_RUNNER] + +INPUT_DIR = last:get_images_runner_run_1, last:uncompress_fits_runner, run_sp_Ma_tile:mask_runner + +FILE_PATTERN = CFIS_image, CFIS_weight, pipeline_flag + +FILE_EXT = .fits, .fits, .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# SExtractor executable path +EXEC_PATH = sex + +# SExtractor configuration files +DOT_SEX_FILE = $SP_CONFIG/default_tile.sex +DOT_PARAM_FILE = $SP_CONFIG/default.param +DOT_CONV_FILE = $SP_CONFIG/default.conv + +# Use input weight image if True +WEIGHT_IMAGE = True + +# Use input flag image if True +FLAG_IMAGE = True + +# Use input PSF file if True +PSF_FILE = False + +# Use distinct image for detection (SExtractor in +# dual-image mode) if True +DETECTION_IMAGE = False + +# Distinct weight image for detection (SExtractor +# in dual-image mode) +DETECTION_WEIGHT = False + +ZP_FROM_HEADER = False + +BKG_FROM_HEADER = False + +# Type of image check (optional), default not used, can be a list of +# BACKGROUND, BACKGROUND_RMS, INIBACKGROUND, +# MINIBACK_RMS, -BACKGROUND, #FILTERED, +# OBJECTS, -OBJECTS, SEGMENTATION, APERTURES +CHECKIMAGE = BACKGROUND + +# File name suffix for the output sextractor files (optional) +SUFFIX = sexcat + +## Post-processing + +# Necessary for tiles, to enable multi-exposure processing +MAKE_POST_PROCESS = True + +# Multi-epoch mode: Path to file with single-exposure WCS header information +LOG_WCS = $SP_RUN/output/log_exp_headers.sqlite + +# World coordinate keywords, SExtractor output. Format: KEY_X,KEY_Y +WORLD_POSITION = XWIN_WORLD,YWIN_WORLD + +# Number of pixels in x,y of a CCD. Format: Nx,Ny +CCD_SIZE = 33,2080,1,4612 diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index 6372a13e8..a70de962d 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -76,12 +76,23 @@ NGMIX_FLUX_ERR_2M NGMIX_FLUX_ERR_2P NGMIX_FLUX_ERR_NOSHEAR -# magnitude, mainly for plots +# magnitudes MAG_AUTO - -# SNR from SExtractor, used for cuts on GALSIM shapes +MAGERR_AUTO +MAG_WIN +MAGERR_WIN +FLUX_AUTO +FLUXERR_AUTO +FLUX_APER +FLUXERR_APER +FLUX_RADIUS + +# SNR from SExtractor SNR_WIN +FWHM_IMAGE +FWHM_WORLD + # PSF size measured on original image NGMIX_T_PSFo_NOSHEAR diff --git a/install_shapepipe b/install_shapepipe index c370d91d0..e6ac4ad20 100755 --- a/install_shapepipe +++ b/install_shapepipe @@ -25,20 +25,6 @@ libpng_ver="1.6.37" mpi4py_ver="3.1.3" openblas_ver="0.3.18" -# SExtractor Package -sex_url="https://github.com/astromatic/sextractor/archive/2.25.0.tar.gz" -sex_tar="2.25.0.tar.gz" -sex_dir="sextractor-2.25.0" -sex_bin="sex" -sex_version=TRUE - -# PSFEx Package -psfex_url="https://github.com/astromatic/psfex/archive/3.21.1.tar.gz" -psfex_tar="3.21.1.tar.gz" -psfex_dir="psfex-3.21.1" -psfex_bin="psfex" -psfex_version=TRUE - # WeightWatcher Package ww_url="http://snapshot.debian.org/archive/debian/20171211T160522Z/pool/main/w/weightwatcher/weightwatcher_1.12.orig.tar.gz" ww_tar="weightwatcher_1.12.orig.tar.gz" @@ -84,8 +70,6 @@ ENV_NAME='shapepipe' BUILD_ENV=TRUE DEVELOP=FALSE VOS=FALSE -INSTALL_SEX=TRUE -INSTALL_PSFEX=TRUE INSTALL_WW=TRUE INSTALL_CDSCLIENT=TRUE ATLAS_SEARCH=FALSE @@ -105,66 +89,6 @@ download_package() { rm $2 } -# Function to build SExtractor with OpenBLAS -build_sex_blas() { - astromatic_setup - cd $BUILD_DIR/$1 - ./autogen.sh - ./configure --prefix=$CONDA_PREFIX --enable-openblas --with-fftw-libdir=$FFTW_LIB \ - --with-fftw-incdir=$FFTW_INC --with-openblas-libdir=$BLAS_LIB \ - --with-openblas-incdir=$BLAS_INC --quiet - make -j --quiet - make install -} - -# Function to build SExtractor with ATLAS -build_sex_atlas() { - astromatic_setup - cd $BUILD_DIR/$1 - ./autogen.sh - if [ "$ATLAS_SEARCH" == TRUE ] - then - ./configure --prefix=$CONDA_PREFIX --with-fftw-libdir=$FFTW_LIB \ - --with-fftw-incdir=$FFTW_INC --quiet - else - ./configure --prefix=$CONDA_PREFIX --with-fftw-libdir=$FFTW_LIB \ - --with-fftw-incdir=$FFTW_INC --with-atlas-libdir=$ATLAS_LIB \ - --with-atlas-incdir=$ATLAS_INC --quiet - fi - make -j --quiet - make install -} - -# Function to build PSFEx with OpenBLAS -build_psfex_blas() { - astromatic_setup - cd $BUILD_DIR/$1 - ./autogen.sh - ./configure --prefix=$CONDA_PREFIX --enable-openblas --enable-plplot=no \ - --with-fftw-libdir=$FFTW_LIB --with-fftw-incdir=$FFTW_INC \ - --with-openblas-libdir=$BLAS_LIB --with-openblas-incdir=$BLAS_INC --quiet - make -j --quiet - make install -} - -# Function to build PSFEx with ATLAS -build_psfex_atlas() { - astromatic_setup - cd $BUILD_DIR/$1 - ./autogen.sh - if [ "$ATLAS_SEARCH" == TRUE ] - then - ./configure --prefix=$CONDA_PREFIX --enable-plplot=no \ - --with-fftw-libdir=$FFTW_LIB --with-fftw-incdir=$FFTW_INC --quiet - else - ./configure --prefix=$CONDA_PREFIX --enable-plplot=no \ - --with-fftw-libdir=$FFTW_LIB --with-fftw-incdir=$FFTW_INC \ - --with-atlas-libdir=$ATLAS_LIB --with-atlas-incdir=$ATLAS_INC --quiet - fi - make -j --quiet - make install -} - # Function to build a standard package build_standard() { cd $BUILD_DIR/$1 @@ -193,7 +117,7 @@ check_conda() { CONDA_VERSION_MAJOR=$(cut -d'.' -f1 <<<$CONDA_VERSION) CONDA_VERSION_MINOR=$(cut -d'.' -f2 <<<$CONDA_VERSION) CONDA_VERSION_PATCH=$(cut -d'.' -f3 <<<$CONDA_VERSION) - CONDA_SH=/etc/profile.d/conda.sh + CONDA_SH=/opt/conda/etc/profile.d/conda.sh # Check Conda major version if [ "$CONDA_VERSION_MAJOR" -lt 4 ] then @@ -290,8 +214,6 @@ start() { # Function to report package versions packages() { - check_binary2 $sex_bin "SExtractor" $sex_version $INSTALL_SEX - check_binary2 $psfex_bin "PSFEx" $psfex_version $INSTALL_PSFEX check_binary2 $ww_bin "WeightWatcher" $ww_version $INSTALL_WW check_binary2 $cdsclient_bin "CDSclient" $cdsclient_version $INSTALL_CDSCLIENT check_binary2 $mpi_bin "MPI" $mpi_version $USE_MPI $MPI_ROOT @@ -332,8 +254,6 @@ package_report() { then activate_shapepipe fi - INSTALL_SEX=FALSE - INSTALL_PSFEX=FALSE INSTALL_WW=FALSE INSTALL_CDSCLIENT=FALSE USE_MPI=FALSE @@ -381,8 +301,6 @@ setup() { check_status 'Operating System:' $SYSOS check_status 'Conda Version:' $CONDA_VERSION check_status 'Build Conda Environment:' $BUILD_ENV $ENV_NAME - check_status 'Install SExtractor:' $INSTALL_SEX - check_status 'Install PSFEx:' $INSTALL_PSFEX check_status 'Install WeightWatcher:' $INSTALL_WW check_status 'Install CDSclient:' $INSTALL_CDSCLIENT check_status 'Use MPI:' $USE_MPI $MPI_ROOT @@ -467,8 +385,6 @@ Options:\n \t--develop\t option to install additional development packages\n \t--vos\t\t option to install virtual observatory software\n \t--no-env\t do not build Conda environment\n -\t--no-sex\t do not build SExtractor\n -\t--no-psfex\t do not build PSFEx\n \t--no-ww\t\t do not build WeightWatcher\n \t--no-cds\t do not build CDSclient\n \t--no-exe\t do not build any system executables\n\n @@ -537,14 +453,6 @@ case $i in BUILD_ENV=FALSE shift ;; - --no-sex) - INSTALL_SEX=FALSE - shift - ;; - --no-psfex) - INSTALL_PSFEX=FALSE - shift - ;; --no-ww) INSTALL_WW=FALSE shift @@ -590,8 +498,6 @@ case $i in shift ;; --no-exe) - INSTALL_SEX=FALSE - INSTALL_PSFEX=FALSE INSTALL_WW=FALSE INSTALL_CDSCLIENT=FALSE shift @@ -716,37 +622,6 @@ fi # BUILD EXTERNAL EXECUTABLES ############################################################################## -# Build SExtractor -if [ "$INSTALL_SEX" == TRUE ] && check_binary $sex_bin -then - report_progress 'SExtractor' - conda install -n $ENV_NAME -c conda-forge "fftw==${fftw_ver}" -y - download_package $sex_url $sex_tar - if [ "$use_atlas" == TRUE ] - then - build_sex_atlas $sex_dir - else - conda install -n $ENV_NAME -c conda-forge "openblas==${openblas_ver}" -y - build_sex_blas $sex_dir - fi -fi - -# Build PSFEx -if [ "$INSTALL_PSFEX" == TRUE ] && check_binary $psfex_bin -then - report_progress 'PSFEx' - conda install "libpng==${libpng_ver}" -y - conda install -n $ENV_NAME -c conda-forge "fftw==${fftw_ver}" -y - download_package $psfex_url $psfex_tar - if [ "$use_atlas" == TRUE ] - then - build_psfex_atlas $psfex_dir - else - conda install -n $ENV_NAME -c conda-forge "openblas==${openblas_ver}" -y - build_psfex_blas $psfex_dir - fi -fi - # Build WeightWatcher if [ "$INSTALL_WW" == TRUE ] && check_binary $ww_bin then diff --git a/nsswitch.conf b/nsswitch.conf new file mode 100644 index 000000000..22d8d9949 --- /dev/null +++ b/nsswitch.conf @@ -0,0 +1,62 @@ +# +# /etc/nsswitch.conf +# +# An example Name Service Switch config file. This file should be +# sorted with the most-used services at the beginning. +# +# The entry '[NOTFOUND=return]' means that the search for an +# entry should stop if the search in the previous entry turned +# up nothing. Note that if the search failed due to some other reason +# (like no NIS server responding) then the search continues with the +# next entry. +# +# Valid entries include: +# +# nisplus Use NIS+ (NIS version 3) +# nis Use NIS (NIS version 2), also called YP +# dns Use DNS (Domain Name Service) +# files Use the local files +# db Use the local database (.db) files +# compat Use NIS on compat mode +# hesiod Use Hesiod for user lookups +# [NOTFOUND=return] Stop searching if not found so far +# + +# To use db, put the "db" in front of "files" for entries you want to be +# looked up first in the databases +# +# Example: +#passwd: db files nisplus nis +#shadow: db files nisplus nis +#group: db files nisplus nis + +passwd: sss files +shadow: files sss +group: sss files + +#hosts: db files nisplus nis dns +hosts: files dns + +# Example - obey only what nisplus tells us... +#services: nisplus [NOTFOUND=return] files +#networks: nisplus [NOTFOUND=return] files +#protocols: nisplus [NOTFOUND=return] files +#rpc: nisplus [NOTFOUND=return] files +#ethers: nisplus [NOTFOUND=return] files +#netmasks: nisplus [NOTFOUND=return] files + +bootparams: nisplus [NOTFOUND=return] files + +ethers: files +netmasks: files +networks: files +protocols: files +rpc: files +services: files + +netgroup: nisplus + +publickey: nisplus + +automount: files nisplus +aliases: files nisplus diff --git a/scripts/jupyter/summary_run.ipynb b/scripts/jupyter/summary_run.ipynb new file mode 100644 index 000000000..f527e4159 --- /dev/null +++ b/scripts/jupyter/summary_run.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "130112a4-f2ca-4d26-b884-d8b054676f9c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 3\n", + "%reload_ext autoreload" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "196beca5-10a1-4cf5-9462-be145167cc70", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import shapepipe\n", + "from shapepipe.utilities.summary import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ca63c72d-212c-463e-a792-71efbac0b908", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Setting\n", + "patch = \"P7\"\n", + "\n", + "verbose = False" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "dcb5604c-d61f-4705-8295-63875455cadb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Load parameters\n", + "%run ~/shapepipe/scripts/python/summary_params_pre_v2" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e69b7dab-1fea-4fcc-a8d9-0720e1d628c3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Checking main directory = /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P7\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Set job info for patch P7\n" + ] + } + ], + "source": [ + "jobs, list_tile_IDs = set_jobs_v2_pre_v2(patch, verbose)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2c3a9dde-cf88-493f-926e-7ae7e8e10916", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Initialize runtime dicionary.\n", + "par_runtime = init_par_runtime(list_tile_IDs)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1e9c1487-3cec-4394-9fcf-c12e92a0f984", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# No effect in notebook\n", + "#print_par_runtime(par_runtime, verbose=verbose)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b7c63a22-ead1-4d6a-b081-a74ade515439", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "module expected found miss_expl missing uniq_miss fr_found\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "# Start program\n", + "job_data.print_stats_header()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4720ae18-0633-4646-b392-b1b24e0294c3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " (Job 1)\n", + "get_images_runner_run_1 462 462 0 0 0.0 100.0%\n", + "find_exposures_runner 231 231 0 0 0.0 100.0%\n", + "get_images_runner_run_2 537 0 0 537 179.0 0.0%\n" + ] + } + ], + "source": [ + "for key in \"1\":\n", + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime, indices=[0, 1])\n", + "\n", + " all_exposures = get_all_exposures(jobs[key]._paths_in_dir[1], verbose=verbose)\n", + " par_runtime[\"n_exposures\"] = len(all_exposures)\n", + " par_runtime[\"list_exposures\"] = all_exposures\n", + "\n", + " jobs[key].check_numbers(par_runtime, indices=[2])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f149f404-64e7-4d92-8f54-f300ed620130", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Update runtime dictionary with numbers of exposures\n", + "par_runtime = update_par_runtime_after_find_exp(par_runtime, all_exposures)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "78a9065f-8983-41cf-a34c-21892fc52dd2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Get all keys after \"1\"\n", + "keys = sorted(jobs.keys(), key=int)\n", + "_ = keys.pop(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "79e39954-1155-4ca3-b0b2-64bc5670db53", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " (Job 2)\n", + "uncompress_fits_runner 1268 1268 0 0 0.0 100.0%\n", + "merge_headers_runner 0 0 0 0 0.0 100.0%\n", + "split_exp_runner 137940 137940 0 0 0.0 100.0%\n", + " (Job 4)\n", + "mask_runner 1268 1268 0 0 0.0 100.0%\n", + " (Job 8)\n", + "mask_runner 45600 45600 0 0 0.0 100.0%\n", + " (Job 16)\n", + "sextractor_runner 2536 2536 0 0 0.0 100.0%\n", + " (Job 32)\n", + "sextractor_runner 91200 91200 0 0 0.0 100.0%\n", + "setools_runner 91200 91032 0 168 84.0 99.8%\n", + "psfex_runner 91200 91032 0 168 84.0 99.8%\n", + " (Job 64)\n", + "psfex_interp_runner 1268 1268 0 0 0.0 100.0%\n", + "vignetmaker_runner_run_1 1268 1268 0 0 0.0 100.0%\n", + "spread_model_runner 1268 1268 0 0 0.0 100.0%\n", + "vignetmaker_runner_run_2 5072 5072 0 0 0.0 100.0%\n", + " (Job 128)\n", + "ngmix_runner 1268 1225 0 43 43.0 96.6%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1217 0 51 51.0 96.0%\n", + "ngmix_runner 1268 1228 0 40 40.0 96.8%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + " (Job 256)\n", + "merge_sep_cats_runner 1268 0 0 1268 1268.0 0.0%\n", + "make_cat_runner 1268 0 0 1268 1268.0 0.0%\n", + " (Job 1024)\n", + "psfex_interp_runner 45600 41132 0 4468 4468.0 90.2%\n" + ] + } + ], + "source": [ + "for key in keys:\n", + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "b3d51a05-ecca-420b-b8b3-1fb2b1ec9fe3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " (Job 128)\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1265 0 3 3.0 99.8%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n", + "ngmix_runner 1268 1268 0 0 0.0 100.0%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n" + ] + } + ], + "source": [ + "## Update some runs\n", + "for key in [\"128\"]:\n", + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "67b50a61-e3cc-4559-941d-f39c6a200294", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " (Job 128)\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1265 0 3 3.0 99.8%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n", + "ngmix_runner 1268 1268 0 0 0.0 100.0%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n" + ] + } + ], + "source": [ + "for key in [\"128\"]:\n", + " jobs[key].print_intro()\n", + " \n", + " \n", + " jobs[key].check_numbers(par_runtime=par_runtime)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "affa8293-daf9-4d2b-9215-fe19f8e2c1e2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "session = Session()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "269496d1-cd89-4d13-a5e4-41b897669e22", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ids = [session[\"id\"] for session in session.fetch(kind=\"headless\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "80af8dff-98c7-4db4-8bcc-06936e1875cf", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "This event loop is already running", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_69/559116804.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdestroy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/skaha/session.py\u001b[0m in \u001b[0;36mdestroy\u001b[0;34m(self, id)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[0marguments\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"url\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mloop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_event_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 266\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_until_complete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marguments\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 267\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midentity\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/asyncio/base_events.py\u001b[0m in \u001b[0;36mrun_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 561\u001b[0m \"\"\"\n\u001b[1;32m 562\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 563\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_runnung\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 564\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0mnew_task\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfuture\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/asyncio/base_events.py\u001b[0m in \u001b[0;36m_check_runnung\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 521\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_runnung\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 522\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_running\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This event loop is already running'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 524\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_running_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m raise RuntimeError(\n", + "\u001b[0;31mRuntimeError\u001b[0m: This event loop is already running" + ] + } + ], + "source": [ + "session.destroy(ids[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a3ed14-8aaf-4028-b933-10ecb7376d68", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/python/get_number_objects.py b/scripts/python/get_number_objects.py index 67cdb294d..0f3ae41b1 100755 --- a/scripts/python/get_number_objects.py +++ b/scripts/python/get_number_objects.py @@ -17,7 +17,7 @@ from optparse import OptionParser from astropy.io import fits -from shapepipe.pipeline.run_log import get_last_dir +from shapepipe.pipeline.run_log import get_last_dir, get_all_dirs from shapepipe.utilities import cfis @@ -281,10 +281,21 @@ def main(argv=None): # Save command line arguments to log file f_log = cfis.log_command(argv, close_no_return=False) - module = 'sextractor_runner_run_1' pattern = 'sexcat' run_log_file = 'output/log_run_sp.txt' - last_dir = get_last_dir(run_log_file, module) + + # For v1 + #module = 'sextractor_runner_run_1' + + # For v2 + module = "sextractor_runner" + all_dir = get_all_dirs(run_log_file, module) + paths = [] + for path in all_dir: + if "run_sp_tile_Sx" in path: + paths.append(path) + paths = sorted(paths) + last_dir = paths[-1] file_list = glob.glob(f'{last_dir}/{pattern}*.fits') if len(file_list) == 0: diff --git a/scripts/python/link_to_exp_for_tile.py b/scripts/python/link_to_exp_for_tile.py new file mode 100755 index 000000000..929c90940 --- /dev/null +++ b/scripts/python/link_to_exp_for_tile.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- + +"""Script link_to_exp_for_tile.py + +:Description: Link to exposure and PSF catalogue + for a given tile. + +:Author: Martin Kilbinger + +""" + +import os +import sys +import re +import copy + +from optparse import OptionParser + + +class param: + """General class to store (default) variables + + """ + def __init__(self, **kwds): + self.__dict__.update(kwds) + + def print(self, **kwds): + print(self.__dict__) + + def var_list(self, **kwds): + return vars(self) + + +def params_default(): + """Params Default. + + Set default parameter values. + + Returns + ------- + class param + parameter values + + """ + p_def = param( + tile_base_dir = '.', + exp_base_dir = '.', + ) + + return p_def + + +def parse_options(p_def): + """Parse Options. + + Parse command line options. + + Parameters + ---------- + p_def: class param + parameter values + + Returns + ------- + list + command line options + command line str + + """ + usage = "%prog [OPTIONS]" + parser = OptionParser(usage=usage) + + # IO + parser.add_option( + '-i', + '--input_tile_dir', + dest='tile_base_dir', + type='string', + default=p_def.tile_base_dir, + help=f'input tile base directory, default=\'{p_def.tile_base_dir}\'' + ) + parser.add_option( + '-t', + '--tile_ID', + dest='tile_ID', + type='string', + help=f"input tile ID", + ) + parser.add_option( + '-I', + '--input_exp_dir', + dest='exp_base_dir', + type='string', + default=p_def.exp_base_dir, + help=f'input exposure base directory, default=\'{p_def.exp_base_dir}\'' + ) + parser.add_option( + '-v', + '--verbose', + dest='verbose', + action='store_true', + help='verbose output' + ) + + options, args = parser.parse_args() + + return options, args + + +def check_options(options): + """Check Options. + + Check command line options. + + Parameters + ---------- + options: tuple + Command line options + + Returns + ------- + bool + Result of option check. False if invalid option value. + + """ + return True + + +def update_param(p_def, options): + """Update Param. + + Return default parameter, updated and complemented according to options. + + Parameters + ---------- + p_def: class param + parameter values + optiosn: tuple + command line options + + Returns + ------- + class param + updated paramter values + + """ + param = copy.copy(p_def) + + # Update keys in param according to options values + for key in vars(param): + if key in vars(options): + setattr(param, key, getattr(options, key)) + + # Add remaining keys from options to param + for key in vars(options): + if not key in vars(param): + setattr(param, key, getattr(options, key)) + + # Do extra stuff if necessary + + return param + + +# TODO: move to cs_util +def matching_subdirs(base_dir, pattern): + + # Find all matching subdirectories + subdirs = [] + if os.path.exists(base_dir): + for entry in os.listdir(base_dir): + full_path = os.path.join(base_dir, entry) + if os.path.isdir(full_path) and entry.startswith(pattern): + subdirs.append(full_path) + else: + print(f"Warning: {base_dir} does not exist, continuing...") + + # Sort according to creation date + subdirs.sort(key=os.path.getctime) + + return subdirs + + +def get_tile_out_dir(tile_base_dir, tile_ID): + + tile_out_dir = f"{tile_base_dir}/{tile_ID}/output" + + return tile_out_dir + + +def get_exp_IDs(tile_base_dir, tile_ID, verbose=False): + + tile_out_dir = get_tile_out_dir(tile_base_dir, tile_ID) + + pattern = "run_sp_GitFeGie" + subdirs = matching_subdirs(tile_out_dir, pattern) + + if len(subdirs) == 0: + raise IOError( + f"No matching directory '{pattern}' in {tile_out_dir} found" + ) + if len(subdirs) != 1: + raise IOError( + f"Exactly one directory natching {pattern} in {tile_out_dir} " + + f"expected, not {len(subdirs)}" + ) + + # Replace dot with dash in tile ID + tile_ID_sp = re.sub(r"\.", "-", tile_ID) + exp_ID_file = ( + f"{subdirs[0]}/find_exposures_runner/output/" + + f"exp_numbers-{tile_ID_sp}.txt" + ) + + exp_IDs = [] + with open(exp_ID_file) as f_in: + for line in f_in: + name = line.strip() + # Remove any letter + ID = re.sub("[a-zA-Z]", "", name) + exp_IDs.append(ID) + + if verbose: + print("Exposures: ", exp_IDs) + return exp_IDs + + +def get_exp_single_HDU_IDs(exp_IDs, n_CPU): + + exp_shdu_IDs = [] + for exp_ID in exp_IDs: + for idx in range(n_CPU): + ID = f"{exp_ID}-{idx}" + exp_shdu_IDs.append(ID) + + return exp_shdu_IDs + + +def get_paths(exp_base_dir, exp_shdu_IDs, pattern): + + number = {} + paths = [] + for exp_shdu_ID in exp_shdu_IDs: + + name = f"{exp_base_dir}/{exp_shdu_ID}/output" + path = os.path.abspath(name) + subdirs = matching_subdirs(path, pattern) + n_subdirs = len(subdirs) + + if n_subdirs not in number: + number[n_subdirs] = 1 + else: + number[n_subdirs] += 1 + + if n_subdirs != 1: + msg = ( + f"Exactly one directory matching {pattern} in {path} expected," + + f" not {n_subdirs}" + ) + print(msg) + # More than one match: sort according to name = creation time + subdirs = sorted(subdirs) + if n_subdirs == 0: + continue + + # Append matching subdir; if more than one append newest + paths.append(f"{subdirs[-1]}") + + return paths, number + + +def create_links_paths(tile_base_dir, tile_ID, paths, verbose=False): + + tile_out_dir = get_tile_out_dir(tile_base_dir, tile_ID) + + for path in paths: + + head, tail = os.path.split(path) + src = path + dst = f"{tile_out_dir}/{tail}" + if os.path.exists(dst): + src_existing = os.readlink(dst) + if src_existing == src: + if verbose: + #print("link {src} <- {dst}") + f"Warning: {src} <- {dst} already exists, no link created" + #) + continue + else: + idx = 1 + dst_orig = dst + while True: + dst = f"{dst_orig}_{idx}" + if os.path.exists(dst): + idx += 1 + else: + if verbose: + print(f"link {src} <- {dst}") + os.symlink(src, dst) + break + else: + if verbose: + print(f"link {src} <- {dst}") + os.symlink(src, dst) + + +def main(argv=None): + + # Set default parameters + p_def = params_default() + + # Command line options + options, args = parse_options(p_def) + + if check_options(options) is False: + return 1 + + param = update_param(p_def, options) + + tile_base_dir = param.tile_base_dir + exp_base_dir = param.exp_base_dir + tile_ID = param.tile_ID + n_CPU = 40 + verbose = param.verbose + + exp_IDs = get_exp_IDs(tile_base_dir, tile_ID, verbose=verbose) + exp_shdu_IDs = get_exp_single_HDU_IDs(exp_IDs, n_CPU) + + # Note: psfex P3 is mostly run_sp_exp_SxSePsf + patterns = ["run_sp_exp_SxSePsfPi"] #, "run_sp_exp_Pi"] + for pattern in patterns: + paths, number = get_paths(exp_base_dir, exp_shdu_IDs, pattern) + + create_links_paths(tile_base_dir, tile_ID, paths, verbose=verbose) + + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/python/merge_final_cat.py b/scripts/python/merge_final_cat.py index 1a160c449..076f5c935 100755 --- a/scripts/python/merge_final_cat.py +++ b/scripts/python/merge_final_cat.py @@ -20,8 +20,6 @@ from optparse import OptionParser -from tqdm import tqdm - from shapepipe.utilities import cfis diff --git a/scripts/python/stats_headless_canfar.py b/scripts/python/stats_headless_canfar.py new file mode 100755 index 000000000..1f728949d --- /dev/null +++ b/scripts/python/stats_headless_canfar.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# Name: stats_headless_canfar.py + +# Caution: Does not show all running or pending +# headless jobs, for some reason. + +import sys +from skaha.session import Session + + +def main(argv=None): + + print( + "# Depreciated, does not show pending jobs; use stats_jobs_canfar.sh", + file=sys.stderr, + ) + + session = Session() + + n_headless = session.stats()["instances"]["headless"] + + print(n_headless) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/python/summary_params_pre_v2.py b/scripts/python/summary_params_pre_v2.py new file mode 100644 index 000000000..dc16b59a2 --- /dev/null +++ b/scripts/python/summary_params_pre_v2.py @@ -0,0 +1,242 @@ +# Parameters for summary run + +import os +from shapepipe.utilities.summary import * + +def init_par_runtime(list_tile_IDs): + + # Numbers updated at runtime + par_runtime = {} + + par_runtime["n_tile_IDs"] = len(list_tile_IDs) + par_runtime["list_tile_IDs"] = list_tile_IDs + + return par_runtime + + +def update_par_runtime_after_find_exp(par_runtime, all_exposures): + + n_CCD = 40 + + # Single-HDU single exposure images + par_runtime["n_shdus"] = get_par_runtime(par_runtime, "exposures") * n_CCD + par_runtime["list_shdus"] = get_all_shdus(all_exposures, n_CCD) + + ## For split_exposure_runner, the output is image, weight,flag per single-HDU image + ## and a header per exposure. + par_runtime["n_3*n_shdus+n_exposures"] = ( + 3 * get_par_runtime(par_runtime, "shdus") + + get_par_runtime(par_runtime, "exposures") + ) + + return par_runtime + + +def set_jobs_v2_pre_v2(patch, verbose): + """ Return information about shapepipe jobs + + """ + print(f"Set job info for patch {patch}") + + # Main input and output directory + path_main = f"{os.environ['HOME']}/cosmostat/v2/pre_v2/psfex/{patch}" + + # Logging + path = f"{path_main}/summary" + if not os.path.isdir(path): + os.mkdir(path) + log_file_name = f"{path}/summary_log.txt" + handlers = [ + logging.FileHandler(log_file_name, mode="w"), + logging.StreamHandler() + ] + logging.basicConfig( + level=logging.INFO, format="%(message)s", handlers=handlers + ) + + logging.info(f"Checking main directory = {path_main}") + + # Tile IDs + tile_ID_path = f"{path_main}/tile_numbers.txt" + + ## Tile IDs with dots + list_tile_IDs_dot = get_IDs_from_file(tile_ID_path) + + jobs = {} + + # Set the first job (retrieve images) + + # With "CFIS_" only the linked images are counted. The original + # ones do not match the IDdash pattern. + # If images were downloaded in several runs: + # - Only copy original images, then (re-)set links in SP numbering format + # - get_images_runner_run_[12] consistent + # - remove previous output dirs since only last is searched + jobs["1"] = job_data( + 1, + "run_sp_GitFeGie", + [ + "get_images_runner_run_1", + "find_exposures_runner", + "get_images_runner_run_2", + ], + ["tile_IDs", "tile_IDs", "exposures"], + pattern=["CFIS_", "", ""], + n_mult=[2, 1, 3], + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + jobs["2"] = job_data( + 2, + ["run_sp_Uz", "run_sp_exp_SpMh", "run_sp_exp_SpMh"], + ["uncompress_fits_runner", "merge_headers_runner", "split_exp_runner"], + ["tile_IDs", 0, "3*n_shdus+n_exposures"], + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + run_dir_mask_tiles = "run_sp_tile_Ma" + run_dir_mask_exp = "run_sp_exp_Ma" + mask_module_tiles = "mask_runner" + mask_module_exp = "mask_runner" + + jobs["4"] = job_data( + 4, + run_dir_mask_tiles, + [mask_module_tiles], + ["tile_IDs"], + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + jobs["8"] = job_data( + 8, + run_dir_mask_exp, + [mask_module_exp], + ["shdus"], + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + jobs["16"] = job_data( + 16, + "run_sp_tile_Sx", + ["sextractor_runner"], + ["tile_IDs"], + n_mult=2, + path_main=path_main, + path_left="tile_runs", + output_subdirs=[f"{tile_ID}/output" for tile_ID in list_tile_IDs_dot], + verbose=verbose, + ) + + # TODO 1 setools_runner output/rand_split + # TODO 2 add back Pi + jobs["32"] = job_data( + 32, + [ + "run_sp_exp_SxSePsf", + "run_sp_exp_SxSePsf", + "run_sp_exp_SxSePsf", + #"run_sp_exp_Pi" + ], + [ + "sextractor_runner", + "setools_runner", + "psfex_runner", + # "psfex_interp_runner"], + ], + "shdus", + n_mult=[2, 2, 2], # 1], + path_main=path_main, + path_left="exp_runs", + output_subdirs="shdus", + path_right="output", + verbose=verbose, + ) + + # For P3 + #jobs["33"] = job_data( + # 33, + # "run_sp_exp_Pi", + # ["psfex_interp_runner"], + # "shdus", + # path_main=path_main, + # path_left="exp_runs", + # output_subdirs="shdus", + # path_right="output", + # verbose=verbose, + #) + + jobs["64"] = job_data( + "64", + "run_sp_tile_PsViSmVi", + [ + "psfex_interp_runner", + "vignetmaker_runner_run_1", + "spread_model_runner", + "vignetmaker_runner_run_2", + ], + "tile_IDs", + n_mult=[1, 1, 1, 4], + path_main=path_main, + path_left="tile_runs", + output_subdirs=[f"{tile_ID}/output" for tile_ID in list_tile_IDs_dot], + verbose=verbose, + ) + + n_sh = 8 + run_dirs = [f"run_sp_tile_ngmix_Ng{idx+1}u" for idx in range(n_sh)] + output_path_missing_IDs = [ + f"{path_main}/summary/missing_job_128_ngmix_runner_{idx+1}.txt" for idx in range(n_sh) + ] + jobs["128"] = job_data( + "128", + run_dirs, + ["ngmix_runner"] * 8, + "tile_IDs", + path_main=path_main, + path_left="tile_runs", + output_subdirs=[f"{tile_ID}/output" for tile_ID in list_tile_IDs_dot], + output_path_missing_IDs=output_path_missing_IDs, + verbose=verbose, + ) + + jobs["256"] = job_data( + "256", + ["run_sp_Ms", "run_sp_Mc"], + ["merge_sep_cats_runner", "make_cat_runner"], + "tile_IDs", + path_main=path_main, + path_left="tile_runs", + output_subdirs=[f"{tile_ID}/output" for tile_ID in list_tile_IDs_dot], + verbose=verbose, + ) + + # Post-processing + jobs["512"] = job_data( + "512", + ["run_sp_combined_final"], + ["make_catalog_runner"], + "tile_IDs", + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + jobs["1024"] = job_data( + "1024", + "run_sp_combined_psf", + ["psfex_interp_runner"], + "shdus", + path_main=path_main, + path_left="output", + verbose=verbose, + ) + + return jobs, list_tile_IDs_dot diff --git a/scripts/python/summary_run.py b/scripts/python/summary_run.py new file mode 100755 index 000000000..331764b24 --- /dev/null +++ b/scripts/python/summary_run.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import os + +from shapepipe.utilities.summary import * + +from summary_params_pre_v2 import * + + +def main(argv=None): + + patch = argv[1] + + verbose = False + + jobs, list_tile_IDs_dot = set_jobs_v2_pre_v2(patch, verbose) + + list_tile_IDs = job_data.replace_dot_dash(list_tile_IDs_dot) + + # Numbers updated at runtime + par_runtime = init_par_runtime(list_tile_IDs) + + job_data.print_stats_header() + + for key in "1": + jobs[key].print_intro() + jobs[key].check_numbers(par_runtime=par_runtime, indices=[0, 1]) + + all_exposures = get_all_exposures(jobs[key]._paths_in_dir[1], verbose=True) + par_runtime["n_exposures"] = len(all_exposures) + par_runtime["list_exposures"] = all_exposures + + jobs[key].check_numbers(par_runtime, indices=[2]) + + par_runtime = update_par_runtime_after_find_exp(par_runtime, all_exposures) + + print_par_runtime(par_runtime, verbose=verbose) + + + # Get all keys after "1" + keys = sorted(jobs.keys(), key=int) + _ = keys.pop(0) + + for key in keys: + jobs[key].print_intro() + jobs[key].check_numbers(par_runtime=par_runtime) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/python/update_runs_log_file.py b/scripts/python/update_runs_log_file.py new file mode 100644 index 000000000..a08890117 --- /dev/null +++ b/scripts/python/update_runs_log_file.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import re +import sys +import os + +# TODO: move to cs_util +def matching_subdirs(base_dir, pattern, tail=False): + + # Find all matching subdirectories + subdirs = [] + for entry in os.listdir(base_dir): + full_path = os.path.join(base_dir, entry) + if os.path.isdir(full_path): + found = False + + # Look for pattern at start or end + if pattern in entry: + + # Get full path or last part ("tail") + if not tail: + path = full_path + else: + head, tail = os.path.split(full_path) + path = tail + + # Remove postfix in case of multiple runs of same module + path = re.sub("_run_.\d?", "", path) + + # Append to result + subdirs.append(path) + + # Sort according to creation date + if not tail: + subdirs.sort(key=os.path.getctime) + + return subdirs + + +def get_module_runs(subdirs): + + all_runs = {} + for subdir in subdirs: + runs = matching_subdirs(subdir, "_runner", tail=True) + if len(runs) > 0: + all_runs[subdir] = runs + + return all_runs + + +def update_log_file(module_runs, log_name): + + with open(log_name, "w") as f_out: + for key in module_runs: + print(key, file=f_out, end=" ") + print(",".join(module_runs[key]), file=f_out) + + +def main(argv=None): + + # Set default parameters + #p_def = params_default() + + # Command line options + #options, args = parse_options(p_def) + + #if check_options(options) is False: + #return 1 + + #param = update_param(p_def, options) + + base_dir = "./output" + pattern = "run_sp_" + log_name = f"{base_dir}/log_run_sp.txt" + + subdirs = matching_subdirs(base_dir, pattern) + module_runs = get_module_runs(subdirs) + #save_prev(log_name) + update_log_file(module_runs, log_name) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/sh/combine_runs.bash b/scripts/sh/combine_runs.bash new file mode 100755 index 000000000..3b8a27824 --- /dev/null +++ b/scripts/sh/combine_runs.bash @@ -0,0 +1,201 @@ +#!/usr/bin/env bash + +# Name: combine_runs.bash +# Description: Create new shapepipe run directory with +# links to source files from combined existing runs +# Author: Martin Kilbinger + + +# Command line arguments + +## Default values +cat='final' +psf="mccd" + +## Help string +usage="Usage: $(basename "$0") [OPTIONS] +\n\nOptions:\n + -h\tthis message\n + -p, --psf MODEL\n + \tPSF model, allowed are 'psfex', 'mccd', 'setools', default='$psf'\n + -c, --cat TYPE\n + \tCatalogue type, allowed are 'final', 'flag_tile', 'flag_exp', \n + \t'psf', 'image', default='$cat'\n +" + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -c|--cat) + cat="$2" + shift + ;; + -p|--psf) + psf="$2" + shift + ;; + *) + echo -ne $usage + exit 1 + ;; + esac + shift +done + + +## Check options +if [ "$cat" != "final" ] \ + && [ "$cat" != "flag_tile" ] \ + && [ "$cat" != "flag_exp" ] \ + && [ "$cat" != "psf" ] \ + && [ "$cat" != "image" ]; then + echo "cat (option -c) needs to be 'final', 'flag_tile', 'flag_exp', 'psf', or 'image'" + exit 2 +fi + +## Check options +if [ "$psf" != "psfex" ] \ + && [ "$psf" != "mccd" ] \ + && [ "$psf" != "setools" ]; then + echo "PSF (option -p) needs to be 'psfex' or 'mccd'" + exit 2 +fi + + +## Functions +function link_s () { + target=$1 + link_name=$2 + + if [ -L "$link_name" ]; then + echo "link with name $link_name already exists, skipping..." + let "n_skipped+=1" + else + echo "create link $target <- $link_name" + ln -s $target $link_name + let "n_created+=1" + fi +} + + +# Start program + +n_skipped=0 +n_created=0 + +pwd=`pwd` +out_base="output" + +# Set paths: +## run_out: target output new run directory +## run_in: source input run base directory +## module: source input module runner sub-directory +## pattern: source file pattern + +run_out="run_sp_combined_$cat" + +if [ "$cat" == "final" ]; then + + # v1 + #run_in="$pwd/$out_base/run_sp_Mc_*" + # v2 + run_in="$pwd/tile_runs/*/$out_base/run_sp_Mc_*" + + module="make_catalog_runner" + pattern="final_cat-*" + +elif [ "$cat" == "flag_tile" ]; then + + # v1 + #run_in="$pwd/$out_base/run_sp_MaMa_*/mask_runner_run_1" + # v2 + run_in="$pwd/$out_base/run_sp_tile_Ma_*" + run_out="run_sp_Ma_tile" + + module="mask_runner" + pattern="pipeline_flag-*" + +elif [ "$cat" == "flag_exp" ]; then + + # v1 + #run_in="$pwd/$out_base/run_sp_MaMa_*/mask_runner_run_2" + # v2 + run_in="$pwd/$out_base/run_sp_exp_Ma_*" + run_out="run_sp_Ma_exp" + + module="mask_runner" + pattern="pipeline_flag-*" + +elif [ "$cat" == "image" ]; then + + run_in="$pwd/$out_base/run_sp_Git_*" + module="get_images_runner" + pattern="CFIS_image-*" + +elif [ "$cat" == "psf" ]; then + + #MKDEBUG TODO: add option + # v1 + #run_in="$pwf/$out_base/run_sp_exp_Pi_*" + # v2 + run_in="$pwd/exp_runs/*/$out_base/run_sp_exp_Pi_*" + + pattern="validation_psf-*" + if [ "$psf" == "psfex" ]; then + module="psfex_interp_runner" + elif [ "$psf" == "setools" ]; then + module="setools_runner" + else + module="mccd_interp_runner" + fi + +else + + echo "Invalid catalogue type $cat" + exit 2 + +fi + + +OUTPUT="$pwd/$out_base/$run_out" +mkdir -p $OUTPUT + + +# Create links + +## target directory +outdir=$OUTPUT/$module/output +mkdir -p $outdir + +## identify source files + +# The following can result in an "Argument list too long" error +#FILES=(`find $run_in -type f -name "$pattern" -print0 | xargs -0 echo`) + +i=0 +for dir in $run_in; do + FILES=(`find $dir -type f -name "$pattern" -print0 | xargs -0 echo`) + + echo "$dir $pattern" + + ## Look over source files + for file in ${FILES[@]}; do + + target=$file + link_name=$outdir/`basename $file` + link_s $target $link_name + ((i=i+1)) + + done + +done + +#echo " $n_files target files, $i links created/skipped" +echo " $i total, "$n_skipped skipped, "$n_created links created" + +# Update log file +update_runs_log_file.py diff --git a/scripts/sh/curl_canfar_local.sh b/scripts/sh/curl_canfar_local.sh new file mode 100755 index 000000000..3c2c73658 --- /dev/null +++ b/scripts/sh/curl_canfar_local.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash + +# Global variables +SSL=~/.ssl/cadcproxy.pem +SESSION=https://ws-uv.canfar.net/skaha/v0/session +IMAGE=images.canfar.net/unions/shapepipe +NAME=shapepipe + + +# Command line arguments + +## Default values +job=-1 +ID=-1 +file_IDs=-1 +N_SMP=1 +kind=-1 +version="1.1" +cmd_remote="shapepipe/scripts/sh/init_run_exclusive_canfar.sh" +batch_max=200 +dry_run=0 + +# TODO psf + +## Help string +usage="Usage: $(basename "$0") -j JOB -[e ID |-f file_IDs] -k KIND [OPTIONS] +\n\nOptions:\n + -h\tthis message\n + -j, --job JOB\tRunning JOB, bit-coded\n + -e, --exclusive ID + \timage ID\n + -f, --file_IDs path + \tfile containing IDs\n + -p, --psf MODEL\n + \tPSF model, one in ['psfex'|'mccd'], default='$psf'\n + -k, --kind KIND\n + \timage kind, allowed are 'tile' and 'exp'\n + -N, --N_SMP N_SMOp\n + \tnumber of jobs (SMP mode only), default from original config files\n + -V, --version\n + \tversion of docker image, default='$version'\n + -C, --command_remote\n + \tremote command to run on canfar, default='$cmd_remote'\n + -b, --batch_max\n + \tmaximum batch size = number of jobs run simultaneously, default=$batch_max\n + -n, --dry_run LEVEL\n + \tdry run, from LEVEL=2 (no processing) to 0 (full run)\n +" + +## Help if no arguments +if [ -z $1 ]; then + echo -ne $usage + exit 1 +fi + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -j|--job) + job="$2" + shift + ;; + -e|--exclusive) + ID="$2" + shift + ;; + -f|--file_IDs) + file_IDs="$2" + shift + ;; + -N|--N_SMP) + N_SMP="$2" + shift + ;; + -k|--kind) + kind="$2" + shift + ;; + -b|--batch_max) + batch_max="$2" + shift + ;; + -n|--dry_run) + dry_run="$2" + shift + ;; + esac + shift +done + +## Check options +if [ "$job" == "-1" ]; then + echo "No job indicated, use option -j" + exit 2 +fi + +if [ "$ID" == "-1" ] && [ "$file_IDs" == "-1" ]; then + echo "No image ID(s) indicated, use option -e ID or -f file_IDs" + exit 3 +fi + +if [ "kind" == "-1" ]; then + echo "No image kind indicated, use option -k" + exit 4 +fi + +if [ "$dry_run" != 0 ] && [ "$dry_run" != 1 ] && [ "$dry_run" != 2 ]; then + echo "Invalid dry_run option, allowed are 0, 1, and 2" + exit 5 +fi + +# command line arguments for remote script: +# collect into string + +if [ "$dry_run" == "1" ]; then + arg_dry_run="-n $dry_run" +else + arg_dry_run="" +fi + +RESOURCES="ram=4&cores=$N_SMP" + +# TODO: dir as command line argument to this script +dir=`pwd` +#arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + + +# Return argument for local script to be called via curl +function set_arg() { + my_arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + echo $my_arg +} + + +# MKDEBUG TODO +function call_curl() { + my_arg=$1 + +} + +# Add session and image IDs to log files +function update_session_logs() { + echo $my_session >> session_IDs.txt + echo "$my_session $ID" >> session_image_IDs.txt + +} + +function submit_batch() { + path=$1 + + for ID in `cat $path`; do + my_arg=$(set_arg) + my_session=`curl -E $SSL $SESSION?$RESOURCES -d "image=$IMAGE:$version" -d "name=${NAME}" -d "cmd=$cmd_remote" --data-urlencode "args=$my_arg" &> /dev/null` + update_session_logs + done + +} + +batch=20 +sleep=300 + +((n_thresh=batch_max-batch)) + + +if [ "$dry_run" == 2 ]; then + + # Do not call curl (dry run = 2) + echo "Running command dry run:" + + if [ "$ID" == "-1" ]; then + + # Submit file (dry run = 2) + for ID in `cat $file_IDs`; do + arg=$(set_arg) + echo curl -E $SSL $SESSION?$RESOURCES -d \"image=$IMAGE:$version\" -d \"name=${NAME}\" -d \"cmd=$cmd_remote\" --data-urlencode \"args=$arg\" + done + + else + + # Submit image (dry run = 2) + arg=$(set_arg) + echo curl -E $SSL $SESSION?$RESOURCES -d \"image=$IMAGE:$version\" -d \"name=${NAME}\" -d \"cmd=$cmd_remote\" --data-urlencode \"args=$arg\" + + fi + +else + + # Call curl + rm -rf session_IDs.txt session_image_IDs.txt + + if [ "$ID" == "-1" ]; then + + # Submit file + n_jobs=`cat $file_IDs | wc -l` + if [ "$n_jobs" -gt "$batch_max" ]; then + + # Split into batches + prefix="${file_IDs}_split_" + split -d -l $batch $file_IDs $prefix + n_split=`ls -l $prefix* | wc -l` + echo "Split '$file_IDs' into $n_split batches of size $batch" + + count=1 + n_running=`stats_jobs_canfar.sh` + for batch in $prefix*; do + echo "Number of running jobs = $n_running" + echo "Submitting batch $batch ($count/$n_split)" + echo -ne "\033]0;curl patch=$patch job=$job $count/$n_split\007" + submit_batch $batch + ((count=count+1)) + + n_running=`stats_jobs_canfar.sh` + + while [ "$n_running" -gt "$n_thresh" ]; do + echo "Wait for #jobs = $n_running jobs to go < $n_thresh ..." + sleep $sleep + n_running=`stats_jobs_canfar.sh` + done + + done + + else + + # Submit entire file (single batch) + echo "Submit '$file_IDs' in single batch" + submit_batch $file_IDs + + fi + + else + + # Submit image + arg=$(set_arg) + session=`curl -E $SSL $SESSION?$RESOURCES -d "image=$IMAGE:$version" -d "name=${NAME}" -d "cmd=$cmd_remote" --data-urlencode "args=$arg" &> /dev/null` + update_session_logs + + fi + +fi diff --git a/scripts/sh/curl_canfar_monitor_local.sh b/scripts/sh/curl_canfar_monitor_local.sh new file mode 100755 index 000000000..793d80262 --- /dev/null +++ b/scripts/sh/curl_canfar_monitor_local.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# -H "Accept-Encoding: gzip" faster? + +SSL=~/.ssl/cadcproxy.pem +SESSION=https://ws-uv.canfar.net/skaha/v0/session + +type=$1 + +echo "type=$type" + +for session_ID in `cat session_IDs.txt`; do + cmd="curl -E $SSL $SESSION/$session_ID?view=$type" + echo $cmd + $cmd +done + +exit 0 + +while [ 1 ]; do + session_ID=`tail -n 1 session_IDs.txt` + cmd="curl -E $SSL $SESSION/$session_ID?view=$type" + echo $cmd + $cmd +done diff --git a/scripts/sh/init_canfar_remote.sh b/scripts/sh/init_canfar_remote.sh new file mode 100755 index 000000000..8229aff23 --- /dev/null +++ b/scripts/sh/init_canfar_remote.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +echo "start init canfar" + +#echo init_canfar > ~/init_canfar.log +#date >> ~/init_canfar.log + +. /opt/conda/etc/profile.d/conda.sh + +source activate shapepipe + +echo "end init canfar" + diff --git a/scripts/sh/init_run_canfar.sh b/scripts/sh/init_run_canfar.sh new file mode 100755 index 000000000..1945d69fc --- /dev/null +++ b/scripts/sh/init_run_canfar.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +echo "start init canfar" + +#echo init_canfar > ~/init_canfar.log +#date >> ~/init_canfar.log + +. /opt/conda/etc/profile.d/conda.sh + +conda activate shapepipe + +cd cosmostat/P3_v2/psfex + +tile_ID=$1 +n_SMP=$2 +echo "tile_ID=$tile_ID n_SMP=$n_SMP" + +job_sp $tile_ID -p psfex -j 8 -n $n_SMP + +echo "end init canfar" + diff --git a/scripts/sh/init_run_exclusive_canfar.sh b/scripts/sh/init_run_exclusive_canfar.sh new file mode 100755 index 000000000..dbaac2444 --- /dev/null +++ b/scripts/sh/init_run_exclusive_canfar.sh @@ -0,0 +1,220 @@ +#!/bin/bash + +# init_run_exclusive_canfar.sh + +# Command line arguments +## Default values +job=-1 +ID=-1 +N_SMP=1 +kind=-1 +dry_run=0 +nsh_jobs=8 +dir=`pwd` +VERBOSE=1 + + +# TODO: psf + +## Help string +usage="Usage: $(basename "$0") -j JOB -e ID -k KIND [OPTIONS] +\n\nOptions:\n + -h\tthis message\n + -j, --job JOB\tRUnning JOB, bit-coded\n + -e, --exclusive ID + \timage ID\n + -p, --psf MODEL\n + \tPSF model, one in ['psfex'|'mccd'], default='$psf'\n + -k, --kind KIND\n + \timage kind, allowed are 'tile' and 'exp'\n + -N, --N_SMP N_SMOp\n + \tnumber of jobs (SMP mode only), default from original config files\n + -d, --directory\n + \trun directory, default is pwd ($dir)\n + -n, --dry_run\n + \tdry run, no actuall processing\n +" + +## Help if no arguments +if [ -z $1 ]; then + echo -ne $usage + exit 1 +fi + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -j|--job) + job="$2" + shift + ;; + -e|--exclusive) + ID="$2" + shift + ;; + -N|--N_SMP) + N_SMP="$2" + shift + ;; + -k|--kind) + kind="$2" + shift + ;; + -d|--directory) + dir="$2" + shift + ;; + -n|--dry_run) + dry_run=1 + ;; + esac + shift +done + +# Check options +if [ "$job" == "-1" ]; then + echo "No job indicated, use option -j" + exit 2 +fi + +if [ "$exclusive" == "-1" ]; then + echo "No image ID indicated, use option -e" + exit 3 +fi + +if [ "$kind" == "-1" ]; then + echo "No image kind indicated, use option -k" + exit 4 +fi + +# Functions + +## Print string, executes command, and prints return value. +function command () { + cmd=$1 + dry_run=$2 + + RED='\033[0;31m' + GREEN='\033[0;32m' + NC='\033[0m' # No Color + # Color escape characters show up in log files + #RED='' + #GREEN='' + #NC='' + + if [ $VERBOSE == 1 ]; then + echo "running '$cmd' (dry run=$dry_run)" + fi + if [ "$dry_run" == "0" ]; then + $cmd + res=$? + + if [ $VERBOSE == 1 ]; then + if [ $res == 0 ]; then + echo -e "${GREEN}success, return value = $res${NC}" + else + echo -e "${RED}error, return value = $res${NC}" + if [ $STOP == 1 ]; then + echo "${RED}exiting $(basename "$0")', error in command '$cmd'${NC}" + exit $res + else + echo "${RED}continuing '$(basename "$0")', error in command '$cmd'${NC}" + fi + fi + fi + fi +} + +echo "start init_run_exclusive_canfar" + +if [ "$dry_run" == 1 ]; then + echo "in dry run mode" +fi + +. /opt/conda/etc/profile.d/conda.sh + +conda activate shapepipe + +cd $dir +echo $pwd + +if [ ! -d ${kind}_runs ]; then + command "mkdir ${kind}_runs" $dry_run +fi + + +cd ${kind}_runs + +if [ ! -d "$ID" ]; then + command "mkdir $ID" $dry_run +fi + +cd $ID +pwd + +if [ ! -d "output" ]; then + command "mkdir output" $dry_run +fi + +cd output + +if [ ! -f log_exp_headers.sqlite ]; then + command "ln -s $dir/output/log_exp_headers.sqlite" $dry_run +fi + + +# Update links to global run directories (GiFeGie, Uz, Ma?, combined_flag?) +for dir in $dir/output/run_sp_*; do + command "ln -sf $dir" $dry_run +done + +# Update links to exposure run directories, which were created in job 32 +(( do_job= $job & 64 )) +if [[ $do_job != 0 ]]; then + if [ "$kind" == "tile" ]; then + cd ../../.. + command "link_to_exp_for_tile.py -t $ID -i tile_runs -I exp_runs" $dry_run + cd ${kind}_runs/$ID/output + + # Remove duplicate job-32 runs (tile detection) + n_32=`ls -rt1d run_sp_tile_Sx_* | wc -l` + if [ "$n_32" != "1" ]; then + n_remove="$(($n_32-1))" + echo "removing $n_remove duplicate old job-32 runs" + rm -rf `ls -rt1d run_sp_tile_Sx_* | head -$n_remove` + fi + + # Remove previous runs of this job + rm -rf run_sp_tile_PsViSmVi* + fi +fi + +(( do_job= $job & 256 )) +if [[ $do_job != 0 ]]; then + + # Remove previous runs of this job + rm -rf run_sp_Ms_20??_* + rm -rf run_sp_Mc_20??_* + +fi + +cd .. + +# Update log file +command update_runs_log_file.py $dry_run + +echo -n "pwd: " +pwd + +echo -n "environment: " +echo $CONDA_PREFIX + +command "job_sp_canfar.bash -p psfex -j $job -e $ID --n_smp $N_SMP" $dry_run + +cd $dir + +echo "end init run tile canfar" diff --git a/scripts/sh/job_sp.bash b/scripts/sh/job_sp.bash index 15cb1428a..2a207d807 100755 --- a/scripts/sh/job_sp.bash +++ b/scripts/sh/job_sp.bash @@ -8,11 +8,6 @@ # called in interactive mode on a virtual # machine. # Author: Martin Kilbinger -# Date: v1.0 11/2020 -# v1.1 01/2021 - -# MKDEBUG TODO: -# Option to change SMP_BATCH_SIZE, not for MPI # VM home, required for canfar run. @@ -294,23 +289,6 @@ function upload_logs() { upload "logs" "$id" "$verbose" "${upl[@]}" } -# Print script variables -function print_env() { - echo "*** Environment ***" - echo "Data:" - echo " TILE_ARR=${TILE_ARR[@]}" - echo "Paths:" - echo " VM_HOME=$VM_HOME" - echo " SP_RUN=$SP_RUN" - echo " TILE_NUMBERS_PATH=$TILE_NUMBERS_PATH" - echo " OUTPUT=$OUTPUT" - echo " SP_CONFIG=$SP_CONFIG" - echo "Other variables:" - echo " VCP=$VCP" - echo " CERTFILE=$CERTFILE" - echo "***" -} - function set_config_n_smp() { local config_name=$1 local _n_smp=$2 @@ -420,7 +398,7 @@ if [[ $do_job != 0 ]]; then ### Star detection, selection, PSF model. setools can exit with an error for CCD with insufficient stars, ### the script should continue STOP=0 - command_sp "shapepipe_run -c $SP_CONFIG/config_tile_Sx_exp_${psf}.ini" "Run shapepipe (tile detection, exp $psf)" + command_cfg_shapepipe "config_tile_Sx_exp_${psf}.ini" "Run shapepipe (tile detection, exp $psf)" $n_smp STOP=1 fi diff --git a/scripts/sh/job_sp_canfar.bash b/scripts/sh/job_sp_canfar.bash new file mode 100755 index 000000000..3ba748c69 --- /dev/null +++ b/scripts/sh/job_sp_canfar.bash @@ -0,0 +1,514 @@ +#!/usr/bin/env bash + +# Name: job_sp_canfar.bash +# Description: General script to process one or more tiles +# with all contributing exposures. +# This works as job submission script for +# the canfar batch system. +# called in interactive mode on a virtual +# machine. +# Author: Martin Kilbinger + + +# Command line arguments +## Default values +job=255 +config_dir=$HOME/shapepipe/example/cfis +psf='mccd' +retrieve='vos' +star_cat_for_mask='onthefly' +exclusive='' +results='cosmostat/kilbinger/results_v2' +n_smp=-1 +nsh_jobs=8 + +## Help string +usage="Usage: $(basename "$0") [OPTIONS] [TILE_ID] +\n\nOptions:\n + -h\tthis message\n + -j, --job JOB\tRunning JOB, bit-coded\n + \t 1: retrieve images (online if method=vos)\n + \t 2: prepare images (offline)\n + \t 4: mask tiles (online if star_cat_for_mask=onthefly)\n + \t 8: mask exposures (online if star_cat_for_mask=onthefly)\n + \t 16: detection of galaxies on tiles (offline)\n + \t 32: processing of stars on exposures (offline)\n + \t 64: galaxy selection on tiles (offline)\n + \t 128: shapes and morphology (offline)\n + \t 256: paste catalogues (offline)\n + -c, --config_dir DIR\n + \t config file directory, default='$config_dir'\n + -p, --psf MODEL\n + \tPSF model, one in ['psfex'|'mccd'], default='$psf'\n + -r, --retrieve METHOD\n + \tmethod to retrieve images, allowed are 'vos', 'symlink', default='$retrieve'\n + -s, --star_cat_for_mask\n + \tcatalogue for masking bright stars, allowed are 'onthefly', 'save',\n + \tdefault is '${star_cat_for_mask}'\n + -e, --exclusive ID\n + \texclusive input filer number string ID (default: None)\n + -o, --output_dir\n + \toutput (upload) directory on vos:cfis, default='$results'\n + -n, --n_smp N_SMP\n + \tnumber of jobs (SMP mode only), default from original config files\n + --nsh_jobs NJOB\n + \tnumber of objects per parallel shape module call, \n + \tdefault: optimal number is computed\n + TILE_ID_i\n + \ttile ID(s), e.g. 283.247 214.242\n +" + +## Help if no arguments +if [ -z $1 ]; then + echo -ne $usage + exit 1 +fi + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -j|--job) + job="$2" + shift + ;; + -c|--config_dir) + config_dir="$2" + shift + ;; + -p|--psf) + psf="$2" + shift + ;; + -r|--retrieve) + retrieve="$2" + shift + ;; + -s|--star_cat_for_mask) + star_cat_for_mask="$2" + shift + ;; + -e|--exclusive) + exclusive="$2" + shift + ;; + -o|--output_dir) + results="$2" + shift + ;; + -n|--n_smp) + n_smp="$2" + shift + ;; + --nsh_jobs) + nsh_jobs="$2" + shift + ;; + esac + shift +done + +## Check options +if [ "$psf" != "psfex" ] && [ "$psf" != "mccd" ]; then + echo "PSF (option -p) needs to be 'psfex' or 'mccd'" + exit 2 +fi + +if [ "$star_cat_for_mask" != "onthefly" ] && [ "$star_cat_for_mask" != "save" ]; then + echo "Star cat for mask (option -s) needs to be 'onthefly' or 'save'" + exit 4 +fi + +if [ "$retrieve" != "vos" ] && [ "$retrieve" != "symlink" ]; then + echo "method to retrieve images (option -r) needs to be 'vos' or 'symlink'" + exit 5 +fi + +# For tar archives. TODO: Should be unique to each job +export ID="test" + +## Paths + +## Path variables used in shapepipe config files + +# Run path and location of input image directories +export SP_RUN=`pwd` + +# Config file path +export SP_CONFIG=$SP_RUN/cfis +export SP_CONFIG_MOD=$SP_RUN/cfis_mod + +## Other variables + +# Output +OUTPUT=$SP_RUN/output + +# For tar archives +output_rel=`realpath --relative-to=. $OUTPUT` + +# Stop on error, default=1 +STOP=1 + +# Verbose mode (1: verbose, 0: quiet) +VERBOSE=1 + +# VCP options +export CERTFILE=$HOME/.ssl/cadcproxy.pem +export VCP="vcp --certfile=$CERTFILE" + + +## Functions + +# Print string, executes command, and prints return value. +function command () { + cmd=$1 + str=$2 + + RED='\033[0;31m' + GREEN='\033[0;32m' + NC='\033[0m' # No Color + # Color escape characters show up in log files + #RED='' + #GREEN='' + #NC='' + + + if [ $# == 2 ]; then + if [ $VERBOSE == 1 ]; then + echo "$str: running '$cmd'" + fi + $cmd + else + if [ $VERBOSE == 1 ]; then + echo "$str: running '$cmd $4 \"$5 $6\"'" + fi + $cmd $4 "$5 $6" + fi + res=$? + + if [ $VERBOSE == 1 ]; then + if [ $res == 0 ]; then + echo -e "${GREEN}success, return value = $res${NC}" + else + echo -e "${RED}error, return value = $res${NC}" + if [ $STOP == 1 ]; then + echo "${RED}exiting 'canfar_sp.bash', error in command '$cmd'${NC}" + exit $res + else + echo "${RED}continuing 'canfar_sp.bash', error in command '$cmd'${NC}" + fi + fi + fi +} + +# Run shapepipe command. If error occurs, upload sp log files before stopping script. +function command_sp() { + local cmd=$1 + local str=$2 + + command "$1" "$2" +} + +# Set up config file and call shapepipe_run +function command_cfg_shapepipe() { + local config_name=$1 + local str=$2 + local _n_smp=$3 + local _exclusive=$4 + + if [ "$exclusive" != "" ]; then + exclusive_flag="-e $_exclusive" + else + exclusive_flag="" + fi + + config_upd=$(set_config_n_smp $config_name $_n_smp) + local cmd="shapepipe_run -c $config_upd $exclusive_flag" + command_sp "$cmd" "$str" +} + +# Tar and upload files to vos +function upload() { + base=$1 + shift + ID=$1 + shift + verbose=$1 + shift + upl=("$@") + + echo "Counting upload files" + n_upl=(`ls -l ${upl[@]} | wc`) + if [ $n_upl == 0 ]; then + if [ $STOP == 1 ]; then + echo "Exiting script, no file found for '$base' tar ball" + exit 3 + fi + fi + tar czf ${base}_${ID}.tgz ${upl[@]} + command "$VCP ${base}_${ID}.tgz vos:cfis/$results" "Upload tar ball" +} + +# Upload log files +function upload_logs() { + id=$1 + verbose=$2 + + upl="$output_rel/*/*/logs $output_rel/*/logs" + upload "logs" "$id" "$verbose" "${upl[@]}" +} + +function set_config_n_smp() { + local config_name=$1 + local _n_smp=$2 + + local config_orig="$SP_CONFIG/$config_name" + + if [[ $_n_smp != -1 ]]; then + # Update SMP batch size + local config_upd="$SP_CONFIG_MOD/$config_name" + update_config $config_orig $config_upd "SMP_BATCH_SIZE" $_n_smp + else + # Keep original config file + local config_upd=$config_orig + fi + + # Set "return" value (stdout) + echo "$config_upd" +} + +# Update config file +function update_config() { + local config_orig=$1 + local config_upd=$2 + local key=$3 + local val_upd=$4 + + cat $config_orig \ + | perl -ane 's/'$key'\s+=.+/'$key' = '$val_upd'/; print' > $config_upd +} + +### Start ### + +echo "Start processing" + +# Create input and output directories +mkdir -p $SP_RUN +cd $SP_RUN +mkdir -p $OUTPUT +mkdir -p $SP_CONFIG_MOD + +# Processing + +### Retrieve config files +if [[ $config_dir == *"vos:"* ]]; then + command_sp "$VCP $config_dir ." "Retrieve shapepipe config files" +else + if [[ ! -L cfis ]]; then + command_sp "ln -s $config_dir cfis" "Retrieve shapepipe config files" + fi +fi + + +## Retrieve config files and images (online if retrieve=vos) +## Retrieve and save star catalogues for masking (if star_cat_for_mask=save) +(( do_job= $job & 1 )) +if [[ $do_job != 0 ]]; then + + ### Retrieve files + command_cfg_shapepipe \ + "config_GitFeGie_$retrieve.ini" \ + "Retrieve images" \ + -1 \ + $exclusive + + #if [[ ! -d "data_tiles" ]]; then + #echo "Directory or link 'data_tiles' does not exist, exiting" + #exit 1 + #fi + #command_cfg_shapepipe "config_Git_vos.ini" "Retrieve tiles" -1 $n_exclusive + + ### Retrieve and save star catalogues for masking + if [ "$star_cat_for_mask" == "save" ]; then + #### For tiles + mkdir $SP_RUN/star_cat_tiles + command_sp \ + "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_1/output $SP_RUN/star_cat_tiles" \ + "Save star cats for masking (tile)" + + #### For single-exposures + mkdir $SP_RUN/star_cat_exp + command_sp \ + "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_2/output $SP_RUN/star_cat_exp exp" \ + "Save star cats for masking (exp)" + fi + +fi + +## Prepare images (offline) +(( do_job= $job & 2 )) +if [[ $do_job != 0 ]]; then + + ### Uncompress tile weights + command_cfg_shapepipe "config_tile_Uz.ini" "Run shapepipe (uncompress tile weights)" $n_smp $exclusive + + ### Split images into single-HDU files, merge headers for WCS info + command_cfg_shapepipe \ + "config_exp_SpMh.ini" \ + "Run shapepipe (split images, merge headers)" \ + $n_smp \ + $exclusive + +fi + +## Mask tiles: add star, halo, and Messier object masks (online if "star_cat_for_mask" is "onthefly") +(( do_job= $job & 4 )) +if [[ $do_job != 0 ]]; then + + ### Mask tiles + command_cfg_shapepipe \ + "config_tile_Ma_$star_cat_for_mask.ini" \ + "Run shapepipe (mask tiles)" \ + $n_smp \ + $exclusive + +fi + +## Mask exposures: add star, halo, and Messier object masks (online if "star_cat_for_mask" is "onthefly") +(( do_job= $job & 8 )) +if [[ $do_job != 0 ]]; then + + ### Mask exposures + command_cfg_shapepipe \ + "config_exp_Ma_$star_cat_for_mask.ini" \ + "Run shapepipe (mask exposures)" \ + $n_smp \ + $exclusive + +fi + + +## Remaining exposure processing (offline) +(( do_job= $job & 16 )) +if [[ $do_job != 0 ]]; then + + ### Object detection on tiles + command_cfg_shapepipe \ + "config_tile_Sx.ini" \ + "Run shapepipe (tile detection)" \ + $n_smp \ + $exclusive + +fi + +## Exposure processing (offline) +(( do_job= $job & 32 )) +if [[ $do_job != 0 ]]; then + + ### Star detection, selection, PSF model. setools can exit with an error for CCD with insufficient stars, + ### the script should continue + STOP=0 + command_cfg_shapepipe \ + "config_exp_${psf}.ini" \ + "Run shapepipe (exp $psf)" \ + $n_smp \ + $exclusive + STOP=1 + +fi + +## Process tiles up to shape measurement +(( do_job= $job & 64 )) +if [[ $do_job != 0 ]]; then + + ### PSF model letter: 'P' (psfex) or 'M' (mccd) + letter=${psf:0:1} + Letter=${letter^} + command_cfg_shapepipe \ + "config_tile_${Letter}iViSmVi_canfar.ini" \ + "Run shapepipe (tile PsfInterp=$Letter}: up to ngmix+galsim)" \ + $n_smp \ + $exclusive + +fi + +## Shape measurement (offline) +(( do_job= $job & 128 )) +if [[ $do_job != 0 ]]; then + + ### Prepare config files + n_min=0 + n_obj=`get_number_objects` + nsh_step=`echo "$(($n_obj/$nsh_jobs))"` + + n_max=$((nsh_step - 1)) + for k in $(seq 1 $nsh_jobs); do + cat $SP_CONFIG/config_tile_Ng_template.ini | \ + perl -ane \ + 's/(ID_OBJ_MIN =) X/$1 '$n_min'/; s/(ID_OBJ_MAX =) X/$1 '$n_max'/; s/NgXu/Ng'$k'u/; s/X_interp/'$psf'_interp/g; print' \ + > $SP_CONFIG_MOD/config_tile_Ng${k}u.ini + n_min=$((n_min + nsh_step)) + if [ "$k" == $((nsh_jobs - 1)) ]; then + n_max=-1 + else + n_max=$((n_min + nsh_step - 1)) + fi + done + + ### Shapes, run $nsh_jobs parallel processes + VERBOSE=0 + for k in $(seq 1 $nsh_jobs); do + + # if output dir for subrun exists but no output: re-run + ngmix_run=$OUTPUT/"run_sp_tile_ngmix_Ng${k}u/ngmix_runner" + if [ -e "$ngmix_run" ]; then + ngmix_out="$ngmix_run/output" + n_out=`ls -rlt $ngmix_out | wc -l` + if [ "$n_out" -lt 2 ]; then + command \ + "rm -rf $OUTPUT/run_sp_tile_ngmix_Ng${k}u" \ + "Re-running existing empty ngmix subrun $k" + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" \ + "Run shapepipe (tile: ngmix $k)" & + else + echo "Skipping existing non-empty ngmix subrun $k" + fi + else + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" \ + "Run shapepipe (tile: ngmix $k)" & + fi + done + wait + VERBOSE=1 + +fi + +## Create final catalogues (offline) +(( do_job= $job & 256 )) +if [[ $do_job != 0 ]]; then + + cat $SP_CONFIG/config_merge_sep_cats_template.ini | \ + perl -ane \ + 's/(N_SPLIT_MAX =) X/$1 '$nsh_jobs'/; print' \ + > $SP_CONFIG_MOD/config_merge_sep_cats.ini + + ### Merge separated shapes catalogues + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_merge_sep_cats.ini" \ + "Run shapepipe (tile: merge sep cats)" \ + "$VERBOSE" \ + "$ID" + + ### Merge all relevant information into final catalogue + command_sp \ + "shapepipe_run -c $SP_CONFIG/config_make_cat_$psf.ini" \ + "Run shapepipe (tile: create final cat $psf)" \ + "$VERBOSE" \ + "$ID" + +fi diff --git a/scripts/sh/missing_unique.sh b/scripts/sh/missing_unique.sh new file mode 100644 index 000000000..8820d26f5 --- /dev/null +++ b/scripts/sh/missing_unique.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +FILES=("summary/missing_job_128_ngmix_runner_*.txt") +temp="temp_temp.tmp" +temp2="temp_temp2.tmp" +out="missing_job_128_ngmix_runner_cut.txt" + +i=0 +for file in ${FILES[@]}; do + + echo $file $i + + if [ "$i" == "0" ]; then + cp $file $temp + else + comm -12 <(sort $file) <(sort $temp) > $temp2 + cp $temp2 $temp + fi + + wc $file $temp + + ((i=i+1)) + +done + +mv $temp $out +rm $temp2 diff --git a/scripts/sh/post_proc_sp.bash b/scripts/sh/post_proc_sp.bash index 72c620e68..930d5b79d 100755 --- a/scripts/sh/post_proc_sp.bash +++ b/scripts/sh/post_proc_sp.bash @@ -70,7 +70,7 @@ SP_CONFIG=$SP_BASE/example/cfis # PSF ## Collect all psfinterp results -prepare_star_cat -p $psf +combine_runs -p $psf -t psf ## Merge all psfinterp results and compute PSF residuals shapepipe_run -c $SP_CONFIG/config_MsPl_$psf.ini @@ -79,7 +79,7 @@ shapepipe_run -c $SP_CONFIG/config_MsPl_$psf.ini # Galaxies ## Prepare output directory with links to all 'final_cat' result files -prepare_tiles_for_final +combine_runs ## Merge final output files to single mother catalog input_final=output/run_sp_combined/make_catalog_runner/output diff --git a/scripts/sh/prepare_star_cat.bash b/scripts/sh/prepare_star_cat.bash deleted file mode 100755 index ff8cb167f..000000000 --- a/scripts/sh/prepare_star_cat.bash +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash - -# Name: prepare_star_cat.bash -# Description: Create directory and links to all PSF or star catalogue files -# from previous ShapePipe runs. -# Author: Martin Kilbinger - - -# Command line arguments - -## Default values -psf='mccd' - -## Help string -usage="Usage: $(basename "$0") [OPTIONS] -\n\nOptions:\n - -h\tthis message\n - -p, --psf MODEL\n - \tPSF model, one in ['psfex'|'mccd'|'setools'], default='$psf'\n -" - -## Parse command line -while [ $# -gt 0 ]; do - case "$1" in - -h) - echo -ne $usage - exit 0 - ;; - -p|--psf) - psf="$2" - shift - ;; - *) - echo -ne usage - exit 1 - ;; - esac - shift -done - - -## Path variables -if [ "$psf" == "psfex" ] || [ "$psf" == "mccd" ]; then - psfval_file_base="validation_psf" - dir_individual="psf_validation_ind" -else - psfval_file_base="mask/star_selection" - dir_individual="star_all_ind" -fi - -pwd=`pwd` - - -## Functions -function link_s () { - target=$1 - link_name=$2 - - if [ -L "$link_name" ]; then - let "n_skipped+=1" - else - ln -s $target $link_name - let "n_created+=1" - fi - - return $n -} - - -# Create output dirs -if [ ! -d "$dir_individual" ]; then - mkdir -p $dir_individual -fi - -if [ "$psf" == "psfex" ]; then - runner="psfex_interp_runner" -elif [ "$psf" == "mccd" ]; then - runner="mccd_fit_val_runner" -else - runner="setools_runner" -fi - -# Find all psf validation files and create links. -# Assumes untar_results.sh has been run before. -n_skipped=0 -n_created=0 -FILES=output/*/${runner}/output/${psfval_file_base}* -for val in ${FILES[@]}; do - base=`basename $val` - link_s "$pwd/$val" "$dir_individual/$base" -done -echo " Created $n_created links, skipped $n_skipped files" diff --git a/scripts/sh/prepare_tiles_for_final.bash b/scripts/sh/prepare_tiles_for_final.bash deleted file mode 100755 index c26f8e8c2..000000000 --- a/scripts/sh/prepare_tiles_for_final.bash +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env bash - -# Name: prepare_tiles_for_final.bash -# Description: Create shapepipe run directory with -# links to all `final_cat` fits files -# Author: Martin Kilbinger -# Date: 06/2020 -# Version: 0.1 - -# Command line arguments - -## Default values -cat='final' - -## Help string -usage="Usage: $(basename "$0") [OPTIONS] -\n\nOptions:\n - -h\tthis message\n - -c, --cat TYPE\n - \tCatalogue type, one in ['final'|'flag'|'image'], default='$cat'\n -" - -## Parse command line -while [ $# -gt 0 ]; do - case "$1" in - -h) - echo -ne $usage - exit 0 - ;; - -c|--cat) - cat="$2" - shift - ;; - *) - echo -ne $usage - exit 1 - ;; - esac - shift -done - - -## Functions -function link_s () { - target=$1 - link_name=$2 - - if [ -e "$link_name" ]; then - echo "link with name $link_name already exists, skipping..." - else - echo "create link $target <- $link_name" - ln -s $target $link_name - fi -} - -## Check options -if [ "$cat" != "final" ] && [ "$cat" != "flag" ] && [ "$cat" != "image" ]; then - echo "cat (option -c) needs to be 'final', 'flag', or 'image'" - exit 2 -fi - - -### Start ### - -pwd=`pwd` -out_base="output" - -if [ "$cat" == "final" ]; then - run_dir="run_sp_combined" - INPUT="$pwd/$out_base/run_sp_Mc_*" -elif [ "$cat" == "flag" ]; then - run_dir="run_sp_combined_flag" - INPUT="$pwd/$out_base/run_sp_tile_Ma_*" -else - run_dir="run_sp_combined_image" - INPUT="$pwd/$out_base/run_sp_Git_*" -fi - -log_path="$pwd/$out_base/log_run_sp.txt" -OUTPUT="$pwd/$out_base/$run_dir" -mkdir -p $OUTPUT - -# Directories and file patterns to create/link -if [ "$cat" == "final" ]; then - DIRS=( - "make_catalog_runner" - ) - PATTERNS=( - "final_cat-*" - ) -elif [ "$cat" == "flag" ]; then - DIRS=( - "mask_runner" - ) - PATTERNS=( - "pipeline_flag-*" - ) -else - DIRS=( - "get_images_runner" - ) - PATTERNS=( - "CFIS_image-*" - ) -fi - -# Create links -for n in "${!PATTERNS[@]}"; do - pattern=${PATTERNS[$n]} - dir=$OUTPUT/${DIRS[$n]}/output - echo $n $pattern $dir - mkdir -p $dir - FILES=(`find $INPUT -name "$pattern"`) - n_files=${#FILES[@]} - i=0 - for file in ${FILES[@]}; do - target=$file - link_name=$dir/`basename $file` - link_s $target $link_name - ((i=i+1)) - done - echo " $n_files target files, $i links created/skipped" -done - -# Update log file -modules=`echo ${DIRS[@]} | tr ' ' ,` -echo "./$out_base/$run_dir $modules" >> $log_path diff --git a/scripts/sh/stats_jobs_canfar.sh b/scripts/sh/stats_jobs_canfar.sh new file mode 100755 index 000000000..3c1332f59 --- /dev/null +++ b/scripts/sh/stats_jobs_canfar.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# Name: stats_jobs_canfar.sh +# Author: Martin Kilbinger +# Description: Handles headless jobs on canfar + + +# Global variables + +## Temporary files +tmpfile_jobs="jobinfo.txt" +tmpfile_ids="ids.txt" + +## curl options +SSL=~/.ssl/cadcproxy.pem +SESSION=https://ws-uv.canfar.net/skaha/v0/session + + +# Command line arguments + +## Default values +mode="count" + +## Help string +usage="Usage: $(basename "$0") -j JOB -[e ID |-f file_IDs] -k KIND [OPTIONS] +\n\nOptions:\n + -h\tthis message\n + -m, --mode MODE\n + \tmode, allowed are 'count' (default), 'delete'\n +" + +## Parse command line +while [ $# -gt 0 ]; do + case "$1" in + -h) + echo -ne $usage + exit 0 + ;; + -m|--mode) + mode="$2" + shift + ;; + esac + shift +done + +## Check options +case $mode in + "count"|"delete") + # valid option + ;; + *) + echo "Invalid mode $mode" + exit 1 + ;; +esac + + +# Main program + +# Get all instances +curl -E $SSL $SESSION &> /dev/null > $tmpfile_jobs + +# Get headless job IDs +cat $tmpfile_jobs | grep headless -B 4 -A 12 | grep \"id | perl -F\" -ane 'print "$F[3]\n"' > $tmpfile_ids + +# Number of jobs +n_headless=`cat $tmpfile_ids | wc -l` + +if [ "$mode" == "count" ]; then + + echo $n_headless + +elif [ "$mode" == "delete" ]; then + + echo -n "Delete $n_headless jobs? [y|n] " + read answer + if [ "$answer" == "y" ]; then + for ID in `cat $tmpfile_ids`; do + echo $ID + # Delete headless jobs + #curl -X DELETE -E $SSL $SESSION/$ID + done + fi + +fi + + +# Remove temporary files +rm -f $tmpfile_jobs $tmpfile_ids diff --git a/shapepipe/modules/make_cat_package/make_cat.py b/shapepipe/modules/make_cat_package/make_cat.py index 6bbe26648..4e30e3552 100644 --- a/shapepipe/modules/make_cat_package/make_cat.py +++ b/shapepipe/modules/make_cat_package/make_cat.py @@ -16,7 +16,7 @@ from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utitities import galaxy +from shapepipe.utilities import galaxy def prepare_final_cat_file(output_path, file_number_string): diff --git a/shapepipe/modules/mask_package/__init__.py b/shapepipe/modules/mask_package/__init__.py index af2dc5c5c..ccd55fa9a 100644 --- a/shapepipe/modules/mask_package/__init__.py +++ b/shapepipe/modules/mask_package/__init__.py @@ -63,6 +63,9 @@ Prefix to be appended to output file name ``flag``; helps to distinguish the file patterns of newly created and external mask files +CHECK_EXISTING_DIR : str, optional + If given, search this directory for existing mask files; the + corresponding images will then not be processed Mask config file ================ diff --git a/shapepipe/modules/mask_package/mask.py b/shapepipe/modules/mask_package/mask.py index 69a1c3c97..fc40d1f30 100644 --- a/shapepipe/modules/mask_package/mask.py +++ b/shapepipe/modules/mask_package/mask.py @@ -45,6 +45,8 @@ class Mask(object): Path to external flag file, default is ``None`` (not used) outname_base : str, optional Output file name base, default is ``flag`` + check_existing_dir : str, optional + If not ``None`` (default), search path for existing mask files star_cat_path : str, optional Path to external star catalogue, default is ``None`` (not used; instead the star catalogue is produced on the fly at run time) @@ -64,6 +66,7 @@ def __init__( w_log, path_external_flag=None, outname_base='flag', + check_existing_dir=None, star_cat_path=None, hdu=0, ): @@ -98,6 +101,9 @@ def __init__( # Output file base name self._outname_base = outname_base + # Search path for existing mask files + self._check_existing_dir = check_existing_dir + # Set external star catalogue path if given if star_cat_path is not None: self._star_cat_path = star_cat_path @@ -113,6 +119,7 @@ def __init__( # Set error flag self._err = False + def _get_config(self): """Get Config. @@ -307,6 +314,16 @@ def make_mask(self): Main function to create the mask. """ + output_file_name = ( + f'{self._img_prefix}' + + f'{self._outname_base}{self._img_number}.fits' + ) + if ( + os.path.exists(f"{self._check_existing_dir}//{output_file_name}") + ): + print("MKDEBUG skipping ", output_file_name) + return None, None + if self._config['MD']['make']: self.missing_data() diff --git a/shapepipe/modules/mask_runner.py b/shapepipe/modules/mask_runner.py index 3683076e4..c3215107c 100644 --- a/shapepipe/modules/mask_runner.py +++ b/shapepipe/modules/mask_runner.py @@ -91,6 +91,15 @@ def mask_runner( outname_base = 'flag' + # Path to check for already created mask files + if config.has_option(module_config_sec, 'CHECK_EXISTING_DIR'): + check_existing_dir = config.getexpanded( + module_config_sec, + 'CHECK_EXISTING_DIR' + ) + else: + check_existing_dir = None + # Create instance of Mask mask_inst = Mask( *input_file_list[:2], @@ -101,6 +110,7 @@ def mask_runner( path_external_flag=ext_flag_name, outname_base=outname_base, star_cat_path=ext_star_cat, + check_existing_dir=check_existing_dir, hdu=hdu, w_log=w_log, ) diff --git a/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/shapepipe/modules/mccd_package/mccd_interpolation_script.py index 40e644e8b..c899b2b8f 100644 --- a/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -435,8 +435,6 @@ def _interpolate_me(self): ).T self.interp_PSFs = interp_MCCD(mccd_model_path, gal_pos, ccd) - # self.interp_PSFs = interpsfex( - # dot_psf_path, gal_pos, self._star_thresh, self._chi2_thresh) if ( isinstance(self.interp_PSFs, str) diff --git a/shapepipe/modules/merge_sep_cats_package/merge_sep_cats.py b/shapepipe/modules/merge_sep_cats_package/merge_sep_cats.py index 85f7e194f..4c82d1a8c 100644 --- a/shapepipe/modules/merge_sep_cats_package/merge_sep_cats.py +++ b/shapepipe/modules/merge_sep_cats_package/merge_sep_cats.py @@ -87,6 +87,22 @@ def process(self): cat0.open() list_ext_name = cat0.get_ext_name() list_col_name = cat0.get_col_names() + + + # Inupt ngmix files sometimes have not all sheared versions + # (HDUs 1 - 5 = 1M, 1P, 2M, 2P, NOSHEAR) due to IO errors + if len(list_ext_name) < 6: + raise IndexError( + f"Input ngmix catalogue {input_file} has only" + + f" {len(list_ext_name)} HDUs, required are 6" + ) + + # MKDEBUG: Some input ngmix catalogues have multiple of 5 HDUs + # if reprocessed and not deleted but appended + if len(list_ext_name) > 6: + wmsg = f"Cropping input HDUs from {len(list_ext_name)} to 5" + self._w_log.info(wmsg) + list_ext_name = list_ext_name[:6] cat0.close() # Create empty dictionary diff --git a/shapepipe/modules/psfex_interp_package/psfex_interp.py b/shapepipe/modules/psfex_interp_package/psfex_interp.py index 3ac1b25ea..74064c296 100644 --- a/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -2,7 +2,7 @@ This module computes the PSFs from a PSFEx model at several galaxy positions. -:Authors: Morgan Schmitz and Axel Guinot +:Authors: Morgan Schmitz, Axel Guinot, Martin Kilbinger """ @@ -29,82 +29,6 @@ FILE_NOT_FOUND = 'File_not_found' -def interpsfex(dotpsfpath, pos, thresh_star, thresh_chi2): - """Interpolate PSFEx. - - Use PSFEx generated model to perform spatial PSF interpolation. - - Parameters - ---------- - dotpsfpath : str - Path to ``.psf`` file (PSFEx output) - pos : numpy.ndarray - Positions where the PSF model should be evaluated - thresh_star : int - Threshold of stars under which the PSF is not interpolated - thresh_chi2 : int - Threshold for chi squared - - Returns - ------- - numpy.ndarray - Array of PSFs, each row is the PSF image at the corresponding position - requested - - """ - if not os.path.exists(dotpsfpath): - return FILE_NOT_FOUND - - # read PSF model and extract basis and polynomial degree and scale position - PSF_model = fits.open(dotpsfpath)[1] - - # Check number of stars used to compute the PSF - if PSF_model.header['ACCEPTED'] < thresh_star: - return NOT_ENOUGH_STARS - if PSF_model.header['CHI2'] > thresh_chi2: - return BAD_CHI2 - - PSF_basis = np.array(PSF_model.data)[0][0] - try: - deg = PSF_model.header['POLDEG1'] - except KeyError: - # constant PSF model - return PSF_basis[0, :, :] - - # scale coordinates - x_interp, x_scale = ( - PSF_model.header['POLZERO1'], - PSF_model.header['POLSCAL1'] - ) - y_interp, y_scale = ( - PSF_model.header['POLZERO2'], - PSF_model.header['POLSCAL2'] - - ) - xs, ys = (pos[:, 0] - x_interp) / x_scale, (pos[:, 1] - y_interp) / y_scale - - # compute polynomial coefficients - coeffs = np.array([[x ** idx for idx in range(deg + 1)] for x in xs]) - cross_coeffs = np.array([ - np.concatenate([ - [(x ** idx_j) * (y ** idx_i) for idx_j in range(deg - idx_i + 1)] - for idx_i in range(1, deg + 1) - ]) - for x, y in zip(xs, ys) - ]) - coeffs = np.hstack((coeffs, cross_coeffs)) - - # compute interpolated PSF - PSFs = np.array([ - np.sum( - [coeff * atom for coeff, atom in zip(coeffs_posi, PSF_basis)], - axis=0, - ) - for coeffs_posi in coeffs - ]) - - return PSFs - class PSFExInterpolator(object): """The PSFEx Interpolator Class. @@ -115,7 +39,7 @@ class PSFExInterpolator(object): Parameters ---------- dotpsf_path : str - Path to PSFEx output file + Path to PSFEx output file; can be `None` in multi-epoch mode galcat_path : str Path to SExtractor-like galaxy catalogue output_path : str @@ -151,18 +75,19 @@ def __init__( ): # Path to PSFEx output file + self._dotpsf_path = dotpsf_path if ( - isinstance(dotpsf_path, type(None)) - or os.path.isfile(dotpsf_path) + not isinstance(dotpsf_path, type(None)) + and not os.path.isfile(dotpsf_path) ): - self._dotpsf_path = dotpsf_path - else: raise ValueError(f'Cound not find file {dotpsf_path}.') + # Path to catalogue containing galaxy positions if os.path.isfile(galcat_path): self._galcat_path = galcat_path else: raise ValueError(f'Cound not find file {galcat_path}.') + # Path to output file to be written self._output_path = output_path + '/galaxy_psf' # Path to output file to be written for validation @@ -280,6 +205,88 @@ def _get_galaxy_positions(self): raise KeyError(pos_param_err) galcat.close() + + def interpsfex(self, dotpsfpath, pos): + """Interpolate PSFEx. + + Use PSFEx generated model to perform spatial PSF interpolation. + + Parameters + ---------- + dotpsfpath : str + Path to ``.psf`` file (PSFEx output) + pos : numpy.ndarray + Positions where the PSF model should be evaluated + + Returns + ------- + numpy.ndarray + Array of PSFs, each row is the PSF image at the corresponding position + requested + + """ + if not os.path.exists(dotpsfpath): + return FILE_NOT_FOUND + + # read PSF model and extract basis and polynomial degree and scale position + try: + PSF_model = fits.open(dotpsfpath)[1] + except OSError as err: + msg = f"Psf model file {dotpsfpath} empty or corrupt." + self._w_log.info(msg) + self._w_log.info(f"Error: {err=}, {type(err)=}") + raise + + # Check number of stars used to compute the PSF + thresh_star = self._star_thresh + thresh_chi2 = self._chi2_thresh + + if PSF_model.header['ACCEPTED'] < thresh_star: + return NOT_ENOUGH_STARS + if PSF_model.header['CHI2'] > thresh_chi2: + return BAD_CHI2 + + PSF_basis = np.array(PSF_model.data)[0][0] + try: + deg = PSF_model.header['POLDEG1'] + except KeyError: + # constant PSF model + return PSF_basis[0, :, :] + + # scale coordinates + x_interp, x_scale = ( + PSF_model.header['POLZERO1'], + PSF_model.header['POLSCAL1'] + ) + y_interp, y_scale = ( + PSF_model.header['POLZERO2'], + PSF_model.header['POLSCAL2'] + + ) + xs, ys = (pos[:, 0] - x_interp) / x_scale, (pos[:, 1] - y_interp) / y_scale + + # compute polynomial coefficients + coeffs = np.array([[x ** idx for idx in range(deg + 1)] for x in xs]) + cross_coeffs = np.array([ + np.concatenate([ + [(x ** idx_j) * (y ** idx_i) for idx_j in range(deg - idx_i + 1)] + for idx_i in range(1, deg + 1) + ]) + for x, y in zip(xs, ys) + ]) + coeffs = np.hstack((coeffs, cross_coeffs)) + + # compute interpolated PSF + PSFs = np.array([ + np.sum( + [coeff * atom for coeff, atom in zip(coeffs_posi, PSF_basis)], + axis=0, + ) + for coeffs_posi in coeffs + ]) + + return PSFs + def _interpolate(self): """Interpolate. @@ -287,11 +294,9 @@ def _interpolate(self): positions. """ - self.interp_PSFs = interpsfex( + self.interp_PSFs = self.interpsfex( self._dotpsf_path, self.gal_pos, - self._star_thresh, - self._chi2_thresh, ) def _get_psfshapes(self): @@ -512,25 +517,28 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): output.save_as_fits(data, sex_cat_path=self._galcat_path) - def process_me(self, dot_psf_dir, dot_psf_pattern, f_wcs_path): + def process_me(self, dot_psf_dirs, dot_psf_pattern, f_wcs_path): """Process Multi-Epoch. - Process the multi-epoch. + Process multi-epoc PSF interpolation. Parameters ---------- - dot_psf_dir : str - Path to the directory containing the ``.psf`` files + dot_psf_dirs : list + Paths to the directory containing the ``.psf`` files dot_psf_pattern : str Common pattern of the ``.psf`` files f_wcs_path : str Path to the log file containing the WCS for each CCDs """ - if os.path.exists(dot_psf_dir): - self._dot_psf_dir = dot_psf_dir - else: - raise ValueError(f'Cound not find directory {dot_psf_dir}.') + if not any( + os.path.exists(dot_psf_dir) + for dot_psf_dir in dot_psf_dirs + ): + raise ValueError('Cound not find any dot psf directory.') + + self._dot_psf_dirs = dot_psf_dirs self._dot_psf_pattern = dot_psf_pattern @@ -568,13 +576,14 @@ def _interpolate_me(self): all_id = np.copy(cat.get_data()['NUMBER']) key_ne = 'N_EPOCH' - if key_ne not in cat.get_data(): + if key_ne not in cat.get_data().dtype.names: raise KeyError( - f'Key {key_ne} not found in input galaxy catalogue, needed for' + f'Key {key_ne} not found in input galaxy catalogue' + + f'{self._galcat_path}, needed for' + ' PSF interpolation to multi-epoch data; run previous module' + ' (SExtractor) in multi-epoch mode' ) - n_epoch = np.copy(cat.get_data()[key_me]) + n_epoch = np.copy(cat.get_data()[key_ne]) list_ext_name = cat.get_ext_name() hdu_ind = [ @@ -593,10 +602,22 @@ def _interpolate_me(self): for ccd in ccd_list: if ccd == -1: continue - dot_psf_path = ( - f'{self._dot_psf_dir}/{self._dot_psf_pattern}-{exp_name}' - + f'-{ccd}.psf' - ) + found = False + for dot_psf_dir in self._dot_psf_dirs: + dot_psf_path = ( + f'{dot_psf_dir}/{self._dot_psf_pattern}-{exp_name}' + + f'-{ccd}.psf' + ) + if os.path.exists(dot_psf_path): + found = True + break + if not found: + self._w_log.info( + f"No .psf file found for exposure {exp_name} and" + + f" ccd {ccd}" + ) + continue + ind_obj = np.where(cat.get_data(hdu_index)['CCD_N'] == ccd)[0] obj_id = all_id[ind_obj] gal_pos = np.array( @@ -607,11 +628,9 @@ def _interpolate_me(self): ) ).T - self.interp_PSFs = interpsfex( + self.interp_PSFs = self.interpsfex( dot_psf_path, gal_pos, - self._star_thresh, - self._chi2_thresh, ) if ( @@ -619,7 +638,7 @@ def _interpolate_me(self): and self.interp_PSFs == NOT_ENOUGH_STARS ): self._w_log.info( - f'Not enough stars find in the ccd {ccd} of the ' + f'Not enough stars found on ccd {ccd} of ' + f'exposure {exp_name}. Object inside this ccd will ' + 'lose an epoch.' ) @@ -639,7 +658,7 @@ def _interpolate_me(self): and self.interp_PSFs == FILE_NOT_FOUND ): self._w_log.info( - f'Psf model file {self._dotpsf_path} not found. ' + f'Psf model file {dot_psf_path} not found. ' + 'Object inside this ccd will lose an epoch.' ) continue diff --git a/shapepipe/modules/psfex_interp_runner.py b/shapepipe/modules/psfex_interp_runner.py index f18b543fb..e9c0cca54 100644 --- a/shapepipe/modules/psfex_interp_runner.py +++ b/shapepipe/modules/psfex_interp_runner.py @@ -2,14 +2,14 @@ Module runner for ``psfex_interp``. -:Author: Axel Guinot +:Author: Axel Guinot, Martin Kilbinger """ from shapepipe.modules.module_decorator import module_runner from shapepipe.modules.psfex_interp_package import psfex_interp -from shapepipe.pipeline.run_log import get_last_dir +from shapepipe.pipeline.run_log import get_last_dir, get_all_dirs @module_runner( @@ -66,7 +66,16 @@ def psfex_interp_runner( module_config_sec, 'ME_DOT_PSF_DIR', ) - dot_psf_dir = get_last_dir(run_dirs['run_log'], module) + module_name = module.split(":")[-1] + if "last" in module: + dot_psf_dirs = [get_last_dir(run_dirs['run_log'], module_name)] + elif "all" in module: + dot_psf_dirs = get_all_dirs(run_dirs["run_log"], module_name) + else: + raise ValueError( + "Expected qualifier 'last:' or 'all' before module" + + f" '{module}' in config entry 'ME_DOT_PSF_DIR'") + dot_psf_pattern = config.get( module_config_sec, 'ME_DOT_PSF_PATTERN', @@ -90,7 +99,7 @@ def psfex_interp_runner( ) # Process inputs multi-epoch - psfex_interp_inst.process_me(dot_psf_dir, dot_psf_pattern, f_wcs_path) + psfex_interp_inst.process_me(dot_psf_dirs, dot_psf_pattern, f_wcs_path) # Run in VALIDATION mode elif mode == 'VALIDATION': diff --git a/shapepipe/modules/setools_package/setools.py b/shapepipe/modules/setools_package/setools.py index 78feae3d1..3d4d3eb9b 100644 --- a/shapepipe/modules/setools_package/setools.py +++ b/shapepipe/modules/setools_package/setools.py @@ -58,7 +58,10 @@ def __init__( SEx_catalogue=True, ) cat_file.open() - self._data = cat_file.get_data() + try: + self._data = cat_file.get_data() + except: + raise IOError(f"Could not load catalogue data from {cat}") cat_file.close() else: diff --git a/shapepipe/modules/sextractor_runner.py b/shapepipe/modules/sextractor_runner.py index 9e3bac7d3..e65374983 100644 --- a/shapepipe/modules/sextractor_runner.py +++ b/shapepipe/modules/sextractor_runner.py @@ -2,7 +2,7 @@ Module runner for ``sextractor``. -:Author: Axel Guinot +:Author:s Axel Guinot, Martin Kilbinger """ diff --git a/shapepipe/modules/vignetmaker_package/vignetmaker.py b/shapepipe/modules/vignetmaker_package/vignetmaker.py index 047de8a6b..a7e2b2857 100644 --- a/shapepipe/modules/vignetmaker_package/vignetmaker.py +++ b/shapepipe/modules/vignetmaker_package/vignetmaker.py @@ -7,6 +7,7 @@ """ import re +import os import numpy as np from astropy.wcs import WCS @@ -183,22 +184,22 @@ def _get_stamp(self, img_path, pos, rad): return vign - def _get_stamp_me(self, image_dir, image_pattern): + def _get_stamp_me(self, image_dirs, image_pattern): """Get Stamp Multi-Epoch. Get stamps for multi-epoch data. Parameters ---------- - image_dir : str - Path to the directory where the images are + image_dirs : list + Path to directories to search for input images image_pattern : str Common part of the file names Returns ------- dict - Directory containing object id and vignets for each epoch + Dictionary containing object id and vignets for each epoch """ cat = file_io.FITSCatalogue(self._galcat_path, SEx_catalogue=True) @@ -226,10 +227,19 @@ def _get_stamp_me(self, image_dir, image_pattern): if ccd == -1: continue - img_path = ( - image_dir + '/' + image_pattern + '-' - + exp_name + '-' + str(ccd) + '.fits' - ) + # Look for input image + found = False + image_name = f"{image_pattern}-{exp_name}-{ccd}.fits" + for image_dir in image_dirs: + img_path = f"{image_dir}/{image_name}" + if os.path.exists(img_path): + found = True + break + if not found: + raise FileNotFoundError( + f"Could not find image {image_name}" + ) + ind_obj = np.where(cat.get_data(hdu_index)['CCD_N'] == ccd)[0] obj_id = all_id[ind_obj] @@ -292,23 +302,23 @@ def _get_stamp_me(self, image_dir, image_pattern): return output_dict - def process_me(self, image_dir, image_pattern, f_wcs_path, rad): + def process_me(self, image_dirs, image_pattern, f_wcs_path, rad): """Process Multi-Epoch. Main function to create the stamps in the multi-epoch case. Parameters ---------- - image_dir : list - List of directories where the image are; ff ``len(image_dir) == 1`` - -> all images are in the same directory, else ``len(image_dir)`` - must match ``len(image_pattern)`` + image_dirs : list + Directories of image locations. + Each list item contains sublist in which images are searched. + Length of outer list has to match image_pattern, or be single item. image_pattern : list Common part of each kind of file names f_wcs_path : str Path to the log file containing the WCS for each CCDs rad : int - Radius of the stamp, must be odd + Radius of the stamp, must be an odd integer """ self._f_wcs_file = SqliteDict(f_wcs_path) @@ -316,17 +326,15 @@ def process_me(self, image_dir, image_pattern, f_wcs_path, rad): for idx in range(len(image_pattern)): - if len(image_dir) != len(image_pattern): - output_dict = self._get_stamp_me( - image_dir[0], - image_pattern[idx], - ) - + if len(image_dirs) != len(image_pattern): + index = 0 else: - output_dict = self._get_stamp_me( - image_dir[idx], - image_pattern[idx], - ) + index = idx + + output_dict = self._get_stamp_me( + image_dirs[index], + image_pattern[idx], + ) self._save_vignet_me(output_dict, image_pattern[idx]) diff --git a/shapepipe/modules/vignetmaker_runner.py b/shapepipe/modules/vignetmaker_runner.py index 972e54e16..9a83f1cd6 100644 --- a/shapepipe/modules/vignetmaker_runner.py +++ b/shapepipe/modules/vignetmaker_runner.py @@ -2,14 +2,14 @@ Module runner for ``vignetmaker``. -:Author: Axel Guinot +:Authors: Axel Guinot, Martin Kilbinger """ from shapepipe.modules.module_decorator import module_runner from shapepipe.modules.vignetmaker_package import vignetmaker as vm -from shapepipe.pipeline.run_log import get_last_dir +from shapepipe.pipeline.run_log import get_last_dir, get_all_dirs @module_runner( @@ -91,10 +91,19 @@ def vignetmaker_runner( elif mode == 'MULTI-EPOCH': # Fetch image directory and patterns modules = config.getlist(module_config_sec, 'ME_IMAGE_DIR') - image_dir = [] + image_dirs = [] for module in modules: - last_dir = get_last_dir(run_dirs['run_log'], module) - image_dir.append(last_dir) + module_name = module.split(":")[-1] + if "last" in module: + dirs = [get_last_dir(run_dirs['run_log'], module_name)] + elif "all" in module: + dirs = get_all_dirs(run_dirs['run_log'], module_name) + else: + raise ValueError( + "Expected qualifier 'last:' or 'all' before module" + + f" '{module}' in config entry 'ME_IMAGE_DIR'") + image_dirs.append(dirs) + image_pattern = config.getlist( module_config_sec, 'ME_IMAGE_PATTERN', @@ -103,7 +112,7 @@ def vignetmaker_runner( f_wcs_path = config.getexpanded(module_config_sec, 'ME_LOG_WCS') # Process inputs - vm_inst.process_me(image_dir, image_pattern, f_wcs_path, radius) + vm_inst.process_me(image_dirs, image_pattern, f_wcs_path, radius) # Invalid mode else: diff --git a/shapepipe/pipeline/args.py b/shapepipe/pipeline/args.py index e06e44ef9..818f54163 100644 --- a/shapepipe/pipeline/args.py +++ b/shapepipe/pipeline/args.py @@ -128,5 +128,10 @@ def create_arg_parser(): help='configuration file name', ) + optional.add_argument( + '-e', + '--exclusive', + help='exclusive input file number string', + ) # Return parser return parser.parse_args() diff --git a/shapepipe/pipeline/file_handler.py b/shapepipe/pipeline/file_handler.py index b7479f6f3..66baf12e6 100644 --- a/shapepipe/pipeline/file_handler.py +++ b/shapepipe/pipeline/file_handler.py @@ -32,12 +32,14 @@ class FileHandler(object): List of modules to be run config : CustomParser Configuaration parser instance + exclusive : str, optional + Run this file number string exclusively if given, the default is None verbose : bool, optional Verbose setting, default is True """ - def __init__(self, run_name, modules, config, verbose=True): + def __init__(self, run_name, modules, config, exclusive=None, verbose=True): self._run_name = run_name @@ -46,6 +48,7 @@ def __init__(self, run_name, modules, config, verbose=True): raise ValueError('Invalid module list, check for a trailing comma') self._config = config + self._exclusive = exclusive self._verbose = verbose self.module_runners = get_module_runners(self._module_list) @@ -1110,6 +1113,18 @@ def _format_process_list( + f'numbering scheme "{num_scheme}".' ) + # If "exclusive" options is set: discard all non-matching IDs + if self._exclusive is not None: + id_to_test = f"-{self._exclusive.replace('.', '-')}" + if number == id_to_test: + if self._verbose: + print(f"-- Using exclusive number {self._exclusive} ({id_to_test})") + else: + if self._verbose: + #print(f"Skipping {number}, not equal to {self._exclusive} ({id_to_test})") + pass + continue + if run_method == 'serial': process_items = [] else: @@ -1120,6 +1135,9 @@ def _format_process_list( ]) process_list.append(process_items) + if len(process_list) == 0: + raise ValueError("Empty process list") + return process_list def _save_process_list( diff --git a/shapepipe/pipeline/job_handler.py b/shapepipe/pipeline/job_handler.py index d6baebd89..bc80f35da 100644 --- a/shapepipe/pipeline/job_handler.py +++ b/shapepipe/pipeline/job_handler.py @@ -42,6 +42,8 @@ class JobHandler(object): Joblib backend, the default is None (which corresponds to 'loky') timeout : int, optional Timeout limit for a given job in seconds, the default is None + exclusive : str, optional + Run this file number string exclusively if given, the default is None verbose : bool, optional Verbose setting, default is True @@ -58,6 +60,7 @@ def __init__( batch_size=None, backend=None, timeout=None, + exclusive=None, verbose=True, ): @@ -72,6 +75,7 @@ def __init__( self._module = module self._module_runner = self.filehd.module_runners[self._module] self.error_count = 0 + self.exclusive = exclusive self._verbose = verbose # Add the job parameters to the log diff --git a/shapepipe/pipeline/run_log.py b/shapepipe/pipeline/run_log.py index 115e87de0..c182eeac7 100644 --- a/shapepipe/pipeline/run_log.py +++ b/shapepipe/pipeline/run_log.py @@ -170,6 +170,35 @@ def get_last(runs, module): return last_run.split(' ')[0] +def get_all_dirs(run_log_file, module): + """Get All Dirs. + + Return directory paths corresponding to all runs of given module. + + Parameters + ---------- + run_log_file : str + Run log file name + module : str + Module name + + Returns + ------- + list + Directory names of all module runs + + """ + runs = get_list(run_log_file) + all_runs = get_all(runs, module) + + all_dirs = [] + for run in all_runs: + dir_name = run.split(" ")[0] + all_dirs.append(f"{dir_name}/{module}/output") + + return all_dirs + + def get_last_dir(run_log_file, module): """Get Last Dir. @@ -188,9 +217,10 @@ def get_last_dir(run_log_file, module): Directory name of last module run """ - runs = get_list(run_log_file) - all_runs = get_all(runs, module) - last_run = all_runs[0].split(' ')[0] - last_dir = f'{last_run}/{module}/output' + all_dirs = get_all_dirs(run_log_file, module) + last_dir = all_dirs[0] return last_dir + + + diff --git a/shapepipe/run.py b/shapepipe/run.py index 2443eab50..80bebbda5 100644 --- a/shapepipe/run.py +++ b/shapepipe/run.py @@ -51,12 +51,14 @@ def set_up(self): self._set_run_name() self.modules = self.config.getlist('EXECUTION', 'MODULE') self.mode = self.config.get('EXECUTION', 'MODE').lower() + self.exclusive=self._args.exclusive self.verbose = self.config.getboolean('DEFAULT', 'VERBOSE') self.filehd = FileHandler( self._run_name, self.modules, self.config, - self.verbose, + exclusive=self._args.exclusive, + verbose=self.verbose, ) self.error_count = 0 self._prep_run() @@ -330,6 +332,7 @@ def run_smp(pipe): config=pipe.config, log=pipe.log, job_type=pipe.run_method[module], + exclusive=pipe.exclusive, verbose=pipe.verbose, ) @@ -388,6 +391,7 @@ def run_mpi(pipe, comm): log=pipe.log, job_type=pipe.run_method[module], parallel_mode='mpi', + exclusive=pipe.exclusive, verbose=verbose, ) diff --git a/shapepipe/utilities/summary.py b/shapepipe/utilities/summary.py new file mode 100755 index 000000000..8a1776905 --- /dev/null +++ b/shapepipe/utilities/summary.py @@ -0,0 +1,697 @@ +"""SUMMARY + +Author: Martin Kilbinger + +""" + +import sys +import os +import re +import fnmatch + +import logging + +from collections import Counter + +from tqdm import tqdm + +print("summaary v1.1") + + +def get_IDs_from_file(path): + """Get IDs From File. + + Return IDs from text file. Removes letters and replaces + dots "." with dashes "-". + + Parameters + ---------- + path: str + input file path + + Returns + -------- + list + IDs + + """ + numbers = [] + with open(path) as f_in: + for line in f_in: + entry = line.rstrip() + number = re.sub("[a-zA-Z]", "", entry) + numbers.append(number) + + return numbers + + + +def get_all_exposures(exp_number_file_list, verbose=False): + """Get All Exposures. + + Return all exposure names from a list of text files. + + Parameters + ---------- + exp_number_list: list + input file names + + """ + exposures = set() + for idx, path in enumerate(exp_number_file_list): + exps = get_IDs_from_file(path) + exposures.update(exps) + + return list(exposures) + + +def get_all_shdus(exposures, n_CCD): + """Get All SHDUs. + + Return all single-exposure single-HDU (CCD) IDs. + + Parameters + ---------- + exposures: list + exposure names + n_CCD: int + number of CCDs per exposure + + Returns + -------- + list + single-exposure single-HDU IDs + + """ + shdus = [] + for exposure in exposures: + for idx_CCD in range(n_CCD): + shdus.append(f"{exposure}-{idx_CCD}") + + return shdus + + +def set_as_list(item=None, n=None, default=1): + """Set As List. + + Return input as list. + + Parameters + ----------- + item: str, int, or list, optional + input item(s); default is None, in which + case the return is [1] * n + n: int, optional + number of list items to return, default is None, + in which case the number will be set to 1. If item and + n are not None, n has to be equal to len(item) + default: int, optional + value to return if item is not given; + default is 1 + + Raises + ------- + IndexError + if n != len(item) + + Returns + ------- + list + input item(s) as list + """ + my_n = n or 1 + + if not item: + result = [default] * my_n + elif not isinstance(item, list): + result = [item] * my_n + else: + result = item + if len(item) != my_n: + raise IndexError(f"item has length {len(item)} != {n}") + + return result + + +def check_special(module, paths_in_dir, names_in_dir): + + if module == "setools_runner": + inds_special = [] + for idx in range(len(paths_in_dir)): + base_path = paths_in_dir[idx].replace(names_in_dir[idx], "") + + stats_dir = f"{base_path}/../stat" + stats_files = os.listdir(stats_dir) + if len(stats_files) != 1: + raise ValueError( + f"Expected exactly one stats file in {stats_dir}, not" + + f" {len(stats_files)}" + ) + + stats_path = os.path.join(stats_dir, stats_files[0]) + with open(stats_path) as f_in: + lines = f_in.readlines() + for line in lines: + entry = line.rstrip() + m = re.search(line, "Nb stars = (\S*)") + if m: + value = int(m[2]) + if value == 0: + inds_special.append(idx) + else: + print(f"b stars = {value}, not special") + break + + #print(inds_special) + for idx in inds_special: + paths_in_dir.pop(idx) + names_in_dir.pop(idx) + + return paths_in_dir, names_in_dir, len(inds_special) + + +class job_data(object): + """Job Data. + + Class to handle a job. + + Parameters + ---------- + bit: int + bit-coded job number + run_dir: str or list + run directory(ies) + modules: list + module names + key_expected: int or str + number of expected output files; if str: will be updated + with runtime value + n_mult: int or list, optional + multiplicity of output files, default `None`, in which + case it is set to 1 + pattern: list, optional + if not None, file pattern to match; defafult is `None` + path_main: str, optional + main (left-most) part of output directory, default is "." + path_left: str, optional + left (first) part of output directory, default is "./output" + output_subdirs: str, optional + output subdirectories if not `None`; default is `None` + path_right: str, optional + right (last) part of output subdir suffix if not `None`; + default is `None` + output_path_missing_IDs: list, optional + output path of missing ID, if `None` (default) will be + given by job bit and module. + verbose: bool, optional + verbose output if True; default is False + + """ + def __init__( + self, + bit, + run_dir, + modules, + key_expected, + n_mult=None, + pattern=None, + path_main=".", + path_left="output", + output_subdirs=None, + path_right=None, + output_path_missing_IDs=None, + verbose=False, + ): + self._bit = bit + self._run_dir = set_as_list(item=run_dir, n=len(modules)) + self._modules = modules + self._key_expected = set_as_list(item=key_expected, n=len(modules)) + self._n_mult = set_as_list(item=n_mult, n=len(modules)) + self._pattern = set_as_list(item=pattern, n=len(modules), default="") + self._path_main = path_main + self._path_left = path_left + self._output_subdirs = output_subdirs or [""] + self._path_right = set_as_list( + path_right, len(modules), default="." + ) + self._output_path_missing_IDs=output_path_missing_IDs + self._verbose = verbose + + def print_intro(self): + """Print Intro. + + Print header line for job statistics. + + """ + logging.info(f" # Job {self._bit}:") + + @classmethod + def print_stats_header(self): + """Print Stats Header. + + Print overall header information for stats output. + + """ + logging.info( + "module expected found miss_expl" + + " missing uniq_miss fr_found" + ) + logging.info("=" * 100) + + @classmethod + def print_stats( + self, + module, + n_expected, + n_found, + n_missing_explained, + n_missing, + n_mult, + ): + """Print Stats. + + Print output file statistics. + + Parameters + ---------- + module: str + module name + n_expected: int + number of expected files + n_found: int + number of found files + n_missing_explained: int + number of missing but explained files + n_missing: int + number of missing files + n_mult: int + multipicity + + """ + if n_expected > 0: + fraction_found = n_found / n_expected + else: + fraction_found = 1 + + n_missing_per_mult = n_missing / n_mult + + logging.info( + f"{module:30s} {n_expected:9d} {n_found:9d}" + + f" {n_missing_explained:9d} {n_missing:9d}" + + f" {n_missing_per_mult:9.1f} {fraction_found:9.1%}" + ) + + @classmethod + def is_ID_in_str(self, ID, path): + if ID in path: + return True + #if re.sub("\.", "-", ID) in path: + #return True + #return False + + @classmethod + def is_not_in_any(self, ID, list_str): + return not any(ID in string for string in list_str) + + @classmethod + def replace_dot_dash(self, numbers): + + results = [re.sub("\.", "-", number) for number in numbers] + + return results + + @classmethod + def replace_dash_dot_if_tile(self, numbers): + + pattern = re.compile(r"(\d{3})-(\d{3})") + results = [pattern.sub(r"\1.\2", number) for number in numbers] + + return results + + @classmethod + def get_unique(self, names): + n_all = len(names) + names_unique = list(set(names)) + n_unique = len(names_unique) + + if n_all != n_unique: + if True: # self._verbose: + logging.warning( + f"{n_all - n_unique} duplicates removed from {n_all} IDs" + ) + + return names_unique + + @classmethod + def write_IDs_to_file(self, output_path, IDs): + """Write IDs to file. + + Write list if image IDs to text file. + + Parameters + ---------- + output_path: str + output file path + IDs: list + image IDs + + """ + IDs_dot = self.replace_dash_dot_if_tile(IDs) + with open(output_path, "w") as f_out: + for ID in IDs_dot: + print(ID, file=f_out) + + def output_missing( + self, + module, + idx, + par_runtime=None, + ): + """Output Missing. + + Writes IDs of missing images to disk. + + """ + key_expected = self._key_expected[idx] + names_in_dir = self._names_in_dir[idx] + paths_in_dir = self._paths_in_dir[idx] + n_mult = self._n_mult[idx] + + list_expected = get_par_runtime(par_runtime, key_expected, kind="list") + + # Count image IDs in names that were found earlier + + ## Extract image IDs from names + IDs = [] + pattern = re.compile( + r"(?:\d{3}-\d{3}|\d{7}-\d+|\d{7})" + ) + for name, path in zip(names_in_dir, paths_in_dir): + match = pattern.search(name) + if match: + IDs.append(match.group()) + else: + raise ValueError(f"No ID found in {name}") + + + ## Count occurences + ID_counts = Counter(IDs) + + ## Add to missing if ocurence less than n_mult + missing_IDs = [] + for ID in list_expected: + if ID_counts[ID] < n_mult: + missing_IDs.append(ID) + + n_all = len(missing_IDs) + missing_IDs_unique = self.get_unique(missing_IDs) + n_unique = len(missing_IDs_unique) + + if n_unique > 0: + if not self._output_path_missing_IDs: + output_path = ( + f"{self._path_main}/summary/missing_job_{self._bit}" + + f"_{module}.txt" + ) + else: + output_path = self._output_path_missing_IDs[idx] + #print("MKDEBUG", missing_IDs_unique) + self.write_IDs_to_file(output_path, missing_IDs_unique) + + return missing_IDs_unique + + def output_missing_job(self): + output_path = ( + f"{self._path_main}/summary/missing_job_{self._bit}_all.txt" + ) + + missing_IDs_all = set(self._missing_IDs_job) + + if len(missing_IDs_all) > 0: + self.write_IDs_to_file(output_path, missing_IDs_all) + else: + #logging.warning("no missing IDs in output_missing_job") + if os.path.exists(output_path): + os.unlink(output_path) + + @classmethod + def get_last_full_path(self, base_and_subdir, matches): + """Get Last Full Path + + Return full path of last file in list. + + """ + # Sort according to creation time + matches_sorted = sorted( + matches, + key=lambda entry: entry.name, + ) + + # Get most recent one + last = matches_sorted[-1] + + # Get full path + full_path = os.path.join(base_and_subdir, last.name) + + return full_path + + @classmethod + def get_module_output_dir(self, full_path, module): + """Get Module Output Dir. + + Return output directory name for given module. + + """ + directory = f"{full_path}/{module}/output" + + # Some modules have special requirements + if module == "setools_runner": + directory = f"{directory}/rand_split" + + return directory + + def get_matches_final(self, directory, idx): + + # Loop over files + # os.path.whether exists is twice faster than try/except + + if os.path.exists(directory): + pattern = f"{self._pattern[idx]}*" + for entry2 in os.scandir(directory): + if ( + entry2.is_file() + and ( + fnmatch.fnmatch(entry2.name, pattern) + ) + and entry2.stat().st_size > 0 + ): + # Append matching files + self._names_in_dir[idx].append(entry2.name) + self._paths_in_dir[idx].append( + os.path.join(directory, entry2.name) + ) + + def get_names_in_dir(self, iterable, module, idx): + + # Initialise output file names and paths + self._names_in_dir[idx] = [] + self._paths_in_dir[idx] = [] + + # Loop over subdirs + for jdx, subdir in enumerate(iterable): + base_and_subdir = ( + f"{self._path_main}/" + + f"{self._path_left}/{subdir}/" + + f"{self._path_right[idx]}" + ) + if self._verbose: + print(f"**** base_and_subdir {base_and_subdir}") + + if os.path.isdir(base_and_subdir): + + matches = [] + + # Loop over entries (files and dirs) + with os.scandir(base_and_subdir) as entries: + for entry in entries: + + # Append directory name if matches module + if entry.name.startswith(self._run_dir[idx]): + matches.append(entry) + + # This entry does not match module -> next + if not matches: + continue + + if self._verbose: + print("**** Matching entries: ", end="") + for match in matches: + print(match.name) + + full_path = self.get_last_full_path( + base_and_subdir, matches + ) + + # Get module output directory + directory = self.get_module_output_dir( + full_path, module + ) + if self._verbose: + print(f"**** Output dir = {directory}") + + # Find matching file names and paths + self.get_matches_final(directory, idx) + else: + if self._verbose: + print(f"Directory {base_and_subdir} not found") + + def update_subdirs(self, par_runtime): + """Update Subdirs. + + Update subdir names with runtime information if required. + + """ + if not isinstance(self._output_subdirs, list): + self._output_subdirs = get_par_runtime( + par_runtime, self._output_subdirs, kind="list" + ) + + def check_numbers(self, par_runtime=None, indices=None): + """Check Numbers. + + Check output file numbers and IDs. + + Parameters + ---------- + par_runtime : dict, optional + runtime parameter. default is None + indices: list, optional + if not None (default), only check modules corresponding + to indices + + """ + # Update subdirs if not already set as list + self.update_subdirs(par_runtime) + + # Initialise variables + self._names_in_dir = {} + self._paths_in_dir = {} + self._missing_IDs_job = [] + n_missing_job = 0 + + # Loop over modules + for idx, module in enumerate(self._modules): + if indices is not None and idx not in indices: + continue + + if self._verbose: + print(f"** module {module}") + + # Look over subdirs + iterable = self._output_subdirs + if len(iterable) > 1 and self._verbose: + iterable = tqdm(iterable, desc="subdirs", leave=False) + + if self._verbose: + print(f"*** subdirs {self._output_subdirs}") + + # Get output file names and paths + self.get_names_in_dir( + iterable, + module, + idx, + ) + + # If expected is string: Update parameter with runtime value + # and set as integer + if isinstance(self._key_expected[idx], str): + n_expected_base = get_par_runtime( + par_runtime, self._key_expected[idx], kind="n" + ) + else: + n_expected_base = self._key_expected[idx] + + # Get some numbers + n_found = len(self._names_in_dir[idx]) + n_expected = n_expected_base * self._n_mult[idx] + n_missing = n_expected - n_found + + n_missing_explained = 0 + if n_missing > 0: + # TODO: make check_special class function, deal with + # paths, names in dir + if False and module == "setools_runner": + ( + self._paths_in_dir[idx], + self._names_in_dir[idx], + n_missing_explained, + ) = check_special(module, paths_in_dir, names_in_dir) + + n_missing = n_missing - n_missing_explained + + # Print statistics + self.print_stats( + module, + n_expected, + n_found, + n_missing_explained, + n_missing, + self._n_mult[idx], + ) + + # Write missing IDs for module to file + if n_missing > 0: + missing_IDs = self.output_missing( + module, + idx, + par_runtime=par_runtime, + ) + n_missing_job += n_missing + self._missing_IDs_job.extend(missing_IDs) + + # Empty line after job + logging.info("") + + # Write missing IDs for entire job to file + #if n_missing_job > 0: + self.output_missing_job() + + +def get_par_runtime(par_runtime, key, kind="n"): + """Get Par RunTime. + + Return runtime parameter value. + + Parameters + ---------- + par_runtime: dict + runtime parameter + key: str + key + + """ + combined_key = f"{kind}_{key}" + if ( + combined_key == "list_3*n_shdus+n_exposures" + and combined_key not in par_runtime + ): + print("{combined_key} not set, TBD") + return [] + + return par_runtime[combined_key] + + +def print_par_runtime(par_runtime, verbose=True): + # Print runtime parameter values + if True: + logging.info("") + logging.info("===========") + logging.info("par_runtime") + logging.info("-----------") + for key, value in par_runtime.items(): + if not key.startswith("list"): + logging.info(f"{key:30s} {value:6d}") + else: + logging.info(f"{key:29s} {len(value):6d} entries") + logging.info("===========") + logging.info("") diff --git a/xterm-start.sh b/xterm-start.sh new file mode 100755 index 000000000..007a2516f --- /dev/null +++ b/xterm-start.sh @@ -0,0 +1,2 @@ +#!/bin/bash +xterm