From 2f85ca65e464d74e689e4c2f45e7a120dacea900 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Mon, 29 Jan 2024 17:04:25 +0000 Subject: [PATCH] Document CRAM FP is delta-encoded (PR#754) Fixes #747 --- CRAMv3.tex | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CRAMv3.tex b/CRAMv3.tex index f792ed3db..5ad35d8d1 100644 --- a/CRAMv3.tex +++ b/CRAMv3.tex @@ -803,7 +803,7 @@ \subsubsection*{Data series encodings} FC & encoding\texttt{<}byte\texttt{>} & read features codes & see separate section\tabularnewline \hline FP & encoding\texttt{<}int\texttt{>} & in-read positions & positions of the read -features\tabularnewline +features; a positive delta to the last position (starting with zero)\tabularnewline \hline DL & encoding\texttt{<}int\texttt{>} & deletion lengths & base-pair deletion lengths\tabularnewline \hline @@ -1571,6 +1571,9 @@ \subsubsection*{\textbf{Read feature records}} Read features are used to store read details that are expressed using read coordinates (e.g. base differences respective to the reference sequence). The read feature records start with the number of read features followed by the read features themselves. +Each read feature has the position encoded as the distance since the +last feature position, or the absolute position (i.e. delta vs zero) +for the first feature. Finally the single mapping quality and per-base quality scores are stored. \begin{threeparttable}[t] @@ -1580,7 +1583,7 @@ \subsubsection*{\textbf{Read feature records}} \hline int & FN & number of read features & the number of read features\tabularnewline \hline -int & FP & in-read-position\tnote{a} & position of the read feature\tabularnewline +int & FP & in-read-position\tnote{a} & delta-position of the read feature\tabularnewline \hline byte & FC & read feature code\tnote{a} & See feature codes below\tabularnewline \hline @@ -1735,6 +1738,7 @@ \subsubsection*{Decode mapped read pseudocode} \begin{algorithmic}[1] \Procedure{DecodeMappedRead}{} \State $feature\_number\gets$ \Call{ReadItem}{FN, Integer} + \State $last\_feature\_position\gets 0$ \For{$i\gets 1 \algorithmicto feature\_number$} \State \Call{DecodeFeature}{} \EndFor @@ -1749,7 +1753,8 @@ \subsubsection*{Decode mapped read pseudocode} \Procedure{DecodeFeature}{} \settowidth{\maxwidth}{feature\_position\ } \State \algalign{feature\_code}{\gets} \Call{ReadItem}{FC, Integer} - \State \algalign{feature\_position}{\gets} \Call{ReadItem}{FP, Integer} + \State \algalign{feature\_position}{\gets} \Call{ReadItem}{FP, Integer} $+\ last\_feature\_position$ + \State $last\_feature\_position\gets feature\_position$ \settowidth{\maxwidth}{substitution\_code\ } \If{$feature\_code = $`B'} \State \algalign{base}{\gets} \Call{ReadItem}{BA, Byte}