Skip to content

Commit

Permalink
Merge pull request #332 from pangenome/sync_partition_before_pggb
Browse files Browse the repository at this point in the history
update `partition-before-pggb`
  • Loading branch information
AndreaGuarracino authored Sep 28, 2023
2 parents 564586e + 1f53de2 commit 3c17da0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
36 changes: 29 additions & 7 deletions partition-before-pggb
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,45 @@ no_merge_segments=false
block_ratio_min=0
reduce_redundancy=true

# function that parses metric suffix numerical arguments
parse_numeric() {
local input=$1
local value=${input%[KkMmGgTt]*}
local suffix=${input#$value}
if [[ -z $suffix ]]; then
if [[ $value =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
echo $value
return 0
else
echo "[pggb] ERROR: Invalid input" >&2
return 1
fi
fi
case $suffix in
K|k) value=$(echo "$value * 1000" | bc) ;;
M|m) value=$(echo "$value * 1000000" | bc) ;;
G|g) value=$(echo "$value * 1000000000" | bc) ;;
T|t) value=$(echo "$value * 1000000000000" | bc) ;;
*) echo "[pggb] ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;;
esac
printf "%.0f" $value
}

if [ $# -eq 0 ]; then
show_help=true
fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:n:s:l:K:F:k:x:f:B:XH:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haps:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,do-stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
TEMP=`getopt -o i:o:D:a:p:n:s:l:K:F:k:x:f:B:XH:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haps:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
while true ; do
case "$1" in
-i|--input-fasta) input_fasta=$2 ; shift 2 ;;
-s|--segment-length) segment_length=$2 ; shift 2 ;;
-l|--block-length) block_length=$2 ; shift 2 ;;
-s|--segment-length) segment_length=$(parse_numeric $2) ; shift 2 ;;
-l|--block-length) block_length=$(parse_numeric $2) ; shift 2 ;;
-p|--map-pct-id) map_pct_id=$2 ; shift 2 ;;
-n|--n-haplotypes) n_mappings=$2 ; shift 2 ;;
-N|--no-splits) no_splits=true ; shift ;;
Expand All @@ -112,7 +135,7 @@ while true ; do
-Y|--exclude-delim) exclude_delim=$2 ; shift 2 ;;
-k|--min-match-length) min_match_length=$2 ; shift 2 ;;
-f|--sparse-factor) sparse_factor=$2 ; shift 2 ;;
-B|--transclose-batch) transclose_batch=$2 ; shift 2 ;;
-B|--transclose-batch) transclose_batch=$(parse_numeric $2) ; shift 2 ;;
-X|--skip-normalization) skip_normalization=true ; shift ;;
-H|--n-haplotypes-smooth) n_haps=$2 ; shift 2 ;;
-j|--path-jump-max) max_path_jump=$2 ; shift 2 ;;
Expand All @@ -127,7 +150,7 @@ while true ; do
#-C|--consensus-spec) consensus_spec=$2 ; shift 2 ;;
-Q|--consensus-prefix) consensus_prefix=$2 ; shift 2 ;;
-v|--skip-viz) do_viz=false ; do_layout=false; shift ;;
-S|--do-stats) do_stats=true ; shift ;;
-S|--stats) do_stats=true ; shift ;;
-V|--vcf-spec) vcf_spec=$2 ; shift 2 ;;
-m|--multiqc) multiqc=true ; shift ;;
-o|--output-dir) output_dir=$2 ; shift 2 ;;
Expand All @@ -148,7 +171,7 @@ done
if [ $show_version == true ]; then
SCRIPT_DIR=$( cd -- "$(dirname -- "$(readlink -f "${BASH_SOURCE[0]}" )" )" &> /dev/null && pwd )
cd "$SCRIPT_DIR"
GIT_VERSION=$(git describe --always --tags)
GIT_VERSION=$(git describe --always --tags --long)
echo "pggb $GIT_VERSION"
cd - &> /dev/null
exit
Expand Down Expand Up @@ -356,7 +379,6 @@ if [ ! -e "$temp_dir" ]; then
mkdir "$temp_dir"
temp_dir_was_created=true
fi

prefix_mappings_paf="$temp_dir"/$(basename "$prefix_paf")
prefix_seqwish="$temp_dir"/$(basename "$prefix_seqwish")
prefix_smoothed="$temp_dir"/$(basename "$prefix_smoothed")
Expand Down
5 changes: 2 additions & 3 deletions pggb
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,6 @@ reporting:
multiqc: $multiqc
EOT


echo -e "\nRunning pggb\n" >> "$log_file"

# Check Pangenome Sequence Naming (PanSN)
if [ ! -f "${input_fasta}.fai" ]; then
echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'."
Expand All @@ -470,6 +467,8 @@ cut -f 1 "${input_fasta}.fai" | while read -r line; do
fi
done

echo -e "\nRunning pggb\n" >> "$log_file"

if [[ "$input_paf" == false ]]; then
if [[ ! -s "$prefix_paf".alignments.$mapper.paf || $resume == false ]]; then

Expand Down

0 comments on commit 3c17da0

Please sign in to comment.