forked from sjackman/fastascripts
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfachain
executable file
·111 lines (95 loc) · 2.13 KB
/
fachain
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/perl
# Merge overlapping sequences
# Copyright 2012 Shaun Jackman
use strict;
use Getopt::Std qw'getopts';
my %opt;
getopts 'k:o:', \%opt;
my $opt_o =
exists $opt{'o'} ? -$opt{'o'} :
exists $opt{'k'} ? -($opt{'k'} - 1) :
undef;
# Reverse complement.
sub rc($) {
my $seq = $_[0];
($seq = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/;
return $seq;
}
my $id;
my %comment;
my %seq;
my %multiplicity;
while (<>) {
chomp;
next if /^#/ || /^$/;
if (/^>/) {
my $comment;
($id, $comment) = split ' ', (substr $_, 1), 2;
die if exists $comment{$id};
if (defined $comment) {
$comment{$id} = " $comment";
my (undef, $multiplicity) = split ' ', $comment;
$multiplicity{$id} = $multiplicity;
}
next;
}
if (/^[ACGTacgtN]/) {
die if exists $seq{$id};
$seq{$id} = $_ if !/N/;
undef $id;
next;
}
last;
}
my $contig_id = 0;
my %seen;
goto first;
while (<>) {
first:
chomp;
next if /^#/ || /^$/;
my @x = split;
my $contig_chain = shift @x;
die unless $contig_chain =~ /[+-]$/;
my $s = chop $contig_chain;
die unless exists $seq{$contig_chain};
#next if $seen{$contig_chain};
$seen{$contig_chain} = 1;
my $contig_seq = $seq{$contig_chain};
$contig_seq = rc $contig_seq if $s eq '-';
my $contig_multiplicity = $multiplicity{$contig_chain};
$contig_chain .= $s;
while (@x > 0) {
my $o = defined $opt_o ? $opt_o : shift @x;
die unless $o < 0;
$o = -$o;
my $id = shift @x;
die unless $id =~ /[+-]$/;
my $s = chop $id;
die unless exists $seq{$id};
my $seq = $seq{$id};
$seq = rc $seq if $s eq '-';
$seen{$id} = 1;
my $l = $contig_seq;
my $r = $seq;
my $ol = substr $l, -$o, $o, '';
my $or = substr $r, 0, $o, '';
die "$ol\n$or" unless $ol eq $or;
#print "$ida$as\t$idb$bs\t-$o\n";
#print ">$ida$as,$idb$bs $o\n$l$ol$r\n";
$contig_seq .= $r;
$contig_chain .= ",$id$s";
$contig_multiplicity += $multiplicity{$id};
}
my $l = length $contig_seq;
print ">$contig_id $l $contig_multiplicity $contig_chain\n",
"$contig_seq\n";
$contig_id++;
}
exit 0;
for my $id (sort {$a <=> $b} keys %seq) {
if (!$seen{$id}) {
print ">$contig_id$comment{$id} $id\n$seq{$id}\n";
$contig_id++;
}
}