Skip to content

Commit

Permalink
Merge pull request #321 from pangenome/pansn_warning
Browse files Browse the repository at this point in the history
check PanSN in the input FASTA
  • Loading branch information
AndreaGuarracino authored Aug 10, 2023
2 parents c50c17b + b36ab2d commit 9a352f6
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 6 deletions.
16 changes: 14 additions & 2 deletions partition-before-pggb
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,12 @@ fi
# Mandatory parameters
if [[ "$input_fasta" == false || $n_mappings == false ]]; then
show_help=true
>&2 echo "ERROR: mandatory arguments -i and -n"
>&2 echo "[pggb] ERROR: mandatory arguments -i and -n"
fi

if (( "$n_mappings" < 2 )); then
show_help=true
>&2 echo "ERROR: -n must be greater than or equal to 2"
>&2 echo "[pggb] ERROR: -n must be greater than or equal to 2"
fi

if [ $show_help == true ]; then
Expand Down Expand Up @@ -432,6 +432,18 @@ reporting:
multiqc: $multiqc
EOT

# Check Pangenome Sequence Naming (PanSN)
if [ ! -f "${input_fasta}.fai" ]; then
echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'."
exit 1
fi
warning_emitted=0
cut -f 1 "${input_fasta}.fai" | while read -r line; do
if [[ ! $line =~ ^([^#]+#)+[^#]+$ ]] && [[ $warning_emitted -eq 0 ]]; then
echo "[pggb] Warning: there are sequence names (like '$line') that do not match the Pangenome Sequence Naming (PanSN)."
warning_emitted=1
fi
done

#-------------------------------------------------------------------------------
echo -e "\nRunning partitioning\n" >> "$log_file"
Expand Down
22 changes: 18 additions & 4 deletions pggb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ parse_numeric() {
echo $value
return 0
else
echo "ERROR: Invalid input" >&2
echo "[pggb] ERROR: Invalid input" >&2
return 1
fi
fi
Expand All @@ -106,7 +106,7 @@ parse_numeric() {
M|m) value=$(echo "$value * 1000000" | bc) ;;
G|g) value=$(echo "$value * 1000000000" | bc) ;;
T|t) value=$(echo "$value * 1000000000000" | bc) ;;
*) echo "ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;;
*) echo "[pggb] ERROR: Invalid suffix or unsupported suffix. Supported metric suffixes are k, K, m, M, g, G, t, T." >&2; return 1 ;;
esac
printf "%.0f" $value
}
Expand Down Expand Up @@ -180,12 +180,12 @@ fi
# Mandatory parameters
if [[ "$input_fasta" == false || $n_mappings == false ]]; then
show_help=true
>&2 echo "ERROR: mandatory arguments -i and -n"
>&2 echo "[pggb] ERROR: mandatory arguments -i and -n"
fi

if (( "$n_mappings" < 2 )); then
show_help=true
>&2 echo "ERROR: -n must be greater than or equal to 2"
>&2 echo "[pggb] ERROR: -n must be greater than or equal to 2"
fi

if [ $show_help == true ]; then
Expand Down Expand Up @@ -456,6 +456,20 @@ EOT


echo -e "\nRunning pggb\n" >> "$log_file"

# Check Pangenome Sequence Naming (PanSN)
if [ ! -f "${input_fasta}.fai" ]; then
echo "[pggb] ERROR: Index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'."
exit 1
fi
warning_emitted=0
cut -f 1 "${input_fasta}.fai" | while read -r line; do
if [[ ! $line =~ ^([^#]+#)+[^#]+$ ]] && [[ $warning_emitted -eq 0 ]]; then
echo "[pggb] Warning: there are sequence names (like '$line') that do not match the Pangenome Sequence Naming (PanSN)."
warning_emitted=1
fi
done

if [[ "$input_paf" == false ]]; then
if [[ ! -s "$prefix_paf".alignments.$mapper.paf || $resume == false ]]; then

Expand Down

0 comments on commit 9a352f6

Please sign in to comment.