Skip to content

Commit

Permalink
Add script for printing intein-free extein sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
mooreryan committed Feb 5, 2023
1 parent 719a9e8 commit 9063ccd
Show file tree
Hide file tree
Showing 12 changed files with 907 additions and 2 deletions.
4 changes: 2 additions & 2 deletions bin/dune
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(executables
(names main)
(public_names InteinFinder)
(names main remove_inteins)
(public_names InteinFinder RemoveInteins)
(instrumentation
(backend bisect_ppx))
(libraries
Expand Down
77 changes: 77 additions & 0 deletions bin/remove_inteins.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
open! Core

module Cli = struct
open Cmdliner

let ( let+ ) v f = Term.(const f $ v)

let ( and+ ) v1 v2 = Term.(const (fun x y -> (x, y)) $ v1 $ v2)

let prog_name = "RemoveInteins"

type opts = {intein_hit_checks: string; queries: string} [@@deriving sexp_of]

let intein_hit_checks =
let doc = "Path to intein_hits_checks file (should exist)" in
Arg.(
required
& pos 0 (some non_dir_file) None
& info [] ~docv:"HIT_CHECKS" ~doc )

let queries =
let doc = "Path to query fasta file (should exist)" in
Arg.(
required & pos 1 (some non_dir_file) None & info [] ~docv:"QUERIES" ~doc )

let opts : opts Term.t =
let+ intein_hit_checks = intein_hit_checks and+ queries = queries in
{intein_hit_checks; queries}

let opts_to_string opts = Sexp.to_string @@ [%sexp_of: opts] opts

let info =
let doc = "remove inteins from extein sequences" in
let man =
[ `S Manpage.s_description
; `P
"After you run InteinFinder, you can use this program to remove \
generate a set of extein sequences for any inteins that were \
identified by the pipeline."
; `P
"Eventually, the functionality provided by this program will be \
included in the main InteinFinder pipeline, but for now, if you \
need the intein-free extein sequences, use this program."
; `P
"Note that only query sequences with at least one bonafide intein \
sequence will be printed, and that only inteins who scored an \
overall Pass will be removed from said extein sequences. Keep in \
mind that the printed sequences may not be completely intein-free, \
as a query could have multiple inteins, but not all of those \
predicetd inteins may have scored well enough to be automatically \
removed. For now, you will see a warning in cases like these." ]
in
Cmd.info
prog_name
~version:Lib.Config.Version.intein_finder_version
~doc
~man
~exits:[]

let parse_argv () =
match Cmd.eval_value @@ Cmd.v info opts with
| Ok (`Ok opts) ->
opts
| Ok `Help | Ok `Version ->
exit 0
| Error _ ->
exit 1
end

let main () =
let open Lib in
Logging.set_up_logging "debug" ;
let ({Cli.intein_hit_checks; queries} as opts) = Cli.parse_argv () in
Logs.debug (fun m -> m "%s" @@ Cli.opts_to_string opts) ;
Remove_inteins.run ~intein_hit_checks ~queries

let () = main ()
35 changes: 35 additions & 0 deletions lib/coord.ml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ let rec ( <= ) : type idx1 idx2 wrt. (idx1, wrt) t -> (idx2, wrt) t -> bool =
| (Zero_aln _ as x), (One_aln _ as y) ->
x <=* one_to_zero y

(* TODO: arithmatic on different indexing is fine, but different wrt is not.
Currently they both have to be the same. *)

(* Position is closed over addition, so no need to check return values. *)
let add : type idx wrt. (idx, wrt) t -> (idx, wrt) t -> (idx, wrt) t =
fun x y ->
Expand Down Expand Up @@ -317,6 +320,8 @@ let add'' : type idx wrt. int -> (idx, wrt) t -> (idx, wrt) t =
| One_aln x ->
One_aln (x + i)

(* VERY IMPORTANT...DON'T USE SUBTRACTION FOR LENGTH. *)

let sub : type idx wrt. (idx, wrt) t -> (idx, wrt) t -> (idx, wrt) t option =
fun x y ->
match (x, y) with
Expand Down Expand Up @@ -352,6 +357,36 @@ let decr_exn t =
| Some x ->
x

(** Both [start] and [end_] are inclusive. *)
let length :
type idx wrt.
?end_is:[`inclusive | `exclusive]
-> start:(idx, wrt) t
-> end_:(idx, wrt) t
-> unit
-> int =
(* Ensure that start is [always] <= [end_] *)
let canonicalize ~start ~end_ =
if Int.(start > end_) then (end_, start) else (start, end_)
in
fun ?(end_is = `inclusive) ~start ~end_ () ->
let length =
match (start, end_) with
| One_raw start, One_raw end_ ->
let start, end_ = canonicalize ~start ~end_ in
end_ - start + 1
| Zero_raw start, Zero_raw end_ ->
let start, end_ = canonicalize ~start ~end_ in
end_ - start + 1
| One_aln start, One_aln end_ ->
let start, end_ = canonicalize ~start ~end_ in
end_ - start + 1
| Zero_aln start, Zero_aln end_ ->
let start, end_ = canonicalize ~start ~end_ in
end_ - start + 1
in
match end_is with `inclusive -> length | `exclusive -> length - 1

(** Maps the Query alignment columns to the raw positions. *)
module Query_aln_to_raw : sig
module Key : sig
Expand Down
1 change: 1 addition & 0 deletions lib/region.ml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module C = Coord

[@@@coverage off]

(** [query] is the name of the query *)
type t =
{start: C.one_raw; end_: C.one_raw; index: Zero_indexed_int.t; query: string}
[@@deriving fields, sexp_of]
Expand Down
Loading

0 comments on commit 9063ccd

Please sign in to comment.