diff --git a/partition-before-pggb b/partition-before-pggb index 8ff76af..d74476f 100755 --- a/partition-before-pggb +++ b/partition-before-pggb @@ -87,6 +87,29 @@ no_merge_segments=false block_ratio_min=0 reduce_redundancy=true +# function that parses metric suffix numerical arguments +parse_numeric() { + local input=$1 + local value=${input%[KkMmGgTt]*} + local suffix=${input#$value} + if [[ -z $suffix ]]; then + if [[ $value =~ ^[0-9]+(\.[0-9]+)?$ ]]; then + echo $value + return 0 + else + echo "[pggb] ERROR: Invalid input" >&2 + return 1 + fi + fi + case $suffix in + K|k) value=$(echo "$value * 1000" | bc) ;; + M|m) value=$(echo "$value * 1000000" | bc) ;; + G|g) value=$(echo "$value * 1000000000" | bc) ;; + T|t) value=$(echo "$value * 1000000000000" | bc) ;; + *) echo "[pggb] ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;; + esac + printf "%.0f" $value +} if [ $# -eq 0 ]; then show_help=true @@ -94,15 +117,15 @@ fi # read the options cmd=$0" "$@ -TEMP=`getopt -o i:o:D:a:p:n:s:l:K:F:k:x:f:B:XH:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haps:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,do-stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"` +TEMP=`getopt -o i:o:D:a:p:n:s:l:K:F:k:x:f:B:XH:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haps:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"` eval set -- "$TEMP" # extract options and their arguments into variables. while true ; do case "$1" in -i|--input-fasta) input_fasta=$2 ; shift 2 ;; - -s|--segment-length) segment_length=$2 ; shift 2 ;; - -l|--block-length) block_length=$2 ; shift 2 ;; + -s|--segment-length) segment_length=$(parse_numeric $2) ; shift 2 ;; + -l|--block-length) block_length=$(parse_numeric $2) ; shift 2 ;; -p|--map-pct-id) map_pct_id=$2 ; shift 2 ;; -n|--n-haplotypes) n_mappings=$2 ; shift 2 ;; -N|--no-splits) no_splits=true ; shift ;; @@ -112,7 +135,7 @@ while true ; do -Y|--exclude-delim) exclude_delim=$2 ; shift 2 ;; -k|--min-match-length) min_match_length=$2 ; shift 2 ;; -f|--sparse-factor) sparse_factor=$2 ; shift 2 ;; - -B|--transclose-batch) transclose_batch=$2 ; shift 2 ;; + -B|--transclose-batch) transclose_batch=$(parse_numeric $2) ; shift 2 ;; -X|--skip-normalization) skip_normalization=true ; shift ;; -H|--n-haplotypes-smooth) n_haps=$2 ; shift 2 ;; -j|--path-jump-max) max_path_jump=$2 ; shift 2 ;; @@ -127,7 +150,7 @@ while true ; do #-C|--consensus-spec) consensus_spec=$2 ; shift 2 ;; -Q|--consensus-prefix) consensus_prefix=$2 ; shift 2 ;; -v|--skip-viz) do_viz=false ; do_layout=false; shift ;; - -S|--do-stats) do_stats=true ; shift ;; + -S|--stats) do_stats=true ; shift ;; -V|--vcf-spec) vcf_spec=$2 ; shift 2 ;; -m|--multiqc) multiqc=true ; shift ;; -o|--output-dir) output_dir=$2 ; shift 2 ;; @@ -148,7 +171,7 @@ done if [ $show_version == true ]; then SCRIPT_DIR=$( cd -- "$(dirname -- "$(readlink -f "${BASH_SOURCE[0]}" )" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" - GIT_VERSION=$(git describe --always --tags) + GIT_VERSION=$(git describe --always --tags --long) echo "pggb $GIT_VERSION" cd - &> /dev/null exit @@ -356,7 +379,6 @@ if [ ! -e "$temp_dir" ]; then mkdir "$temp_dir" temp_dir_was_created=true fi - prefix_mappings_paf="$temp_dir"/$(basename "$prefix_paf") prefix_seqwish="$temp_dir"/$(basename "$prefix_seqwish") prefix_smoothed="$temp_dir"/$(basename "$prefix_smoothed") diff --git a/pggb b/pggb index 65d1f77..670148c 100755 --- a/pggb +++ b/pggb @@ -454,9 +454,6 @@ reporting: multiqc: $multiqc EOT - -echo -e "\nRunning pggb\n" >> "$log_file" - # Check Pangenome Sequence Naming (PanSN) if [ ! -f "${input_fasta}.fai" ]; then echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'." @@ -470,6 +467,8 @@ cut -f 1 "${input_fasta}.fai" | while read -r line; do fi done +echo -e "\nRunning pggb\n" >> "$log_file" + if [[ "$input_paf" == false ]]; then if [[ ! -s "$prefix_paf".alignments.$mapper.paf || $resume == false ]]; then