forked from arq5x/bedtools-galaxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
intersectBed.xml
155 lines (129 loc) · 9.89 KB
/
intersectBed.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
<tool id="bedtools_intersectbed" name="Intersect interval files" version="2.18.2.0">
<description>
</description>
<requirements>
<requirement type="package" version="2.18.2">bedtools2</requirement>
</requirements>
<version_command>bedtools --version</version_command>
<command>
intersectBed
#if $intype.inselect == "bam"
-abam $intype.inputBam -b $input $intype.bed
#else
-a $intype.inputBed -b $input
#end if
#if $output_opt.output_opt_select == "yes"
$output_opt.overlap_mode
$output_opt.u
$output_opt.c
$output_opt.v
#end if
#if $overlap_opt.overlap_opt_select == "yes"
#if str($overlap_opt.f.value) != "None"
-f $overlap_opt.f
#end if
$overlap_opt.r
#end if
$split
$header
$strand
> $output
</command>
<inputs>
<conditional name="intype">
<param name="inselect" type="select" label="Select input file type">
<option value="bed" selected="true">BED, GFF, Interval, VCF</option>
<option value="bam">BAM</option>
</param>
<when value="bed">
<param name="inputBed" type="data" format="bed,gff,interval,vcf" label="Input A" help="Each feature in A is compared to B in search of overlaps."/>
</when>
<when value="bam">
<param name="inputBam" type="data" format="bam" label="Input A" help="Each BAM alignment in A is compared to B in search of overlaps."/>
<param name="bed" type="boolean" truevalue="-bed" falsevalue="" checked="False" label="Write output as BED" help="The default is to write output in BAM when using BAM as input."/>
</when>
</conditional>
<param name="input" type="data" format="bed,gff,vcf" label="Input B"/>
<conditional name="output_opt">
<param name="output_opt_select" type="select" label="Show output options">
<option value="no" selected="true">No</option>
<option value="yes">Yes</option>
</param>
<when value="yes">
<param name="overlap_mode" type="select" label="Calculate coverage based on">
<option value="">Overlaps on either strand</option>
<option value="-wa">Write the original entry in A for each overlap.</option>
<option value="-wb">Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by the fraction- and reciprocal option.</option>
<option value="-wo">Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. Restricted by the fraction- and reciprocal option.</option>
<option value="-wao">Write the original A and B entries plus the number of base pairs of overlap between the two features. However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. Restricted by the fraction- and reciprocal option.</option>
<option value="-loj">Perform a "left outer join". That is, for each feature in A report each overlap with B. If no overlaps are found, report a NULL feature for B.</option>
</param>
<param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Write original A entry once if any overlaps found in B" help="Frequently a feature in "A" will overlap with multiple features in "B". By default, intersectBed will report each overlap as a separate output line. However, one may want to simply know that there is at least one overlap (or none). When one uses this option, "A" features that overlap with one or more "B" features are reported once. Those that overlap with no "B" features are not reported at all."/>
<param name="c" type="boolean" truevalue="-c" falsevalue="" checked="False" label="For each entry in A, report the number of hits in B while restricting to the minimum overlap fraction" help="Reports a column after each "A" feature indicating the number (0 or more) of overlapping features found in "B"."/>
<param name="v" type="boolean" truevalue="-v" falsevalue="" checked="False" label="Only report those entries in A that have no overlap in B"/>
</when>
<when value="no"/>
</conditional>
<conditional name="overlap_opt">
<param name="overlap_opt_select" type="select" label="Show options for overlap definition">
<option value="no" selected="true">No</option>
<option value="yes">Yes</option>
</param>
<when value="yes">
<param name="f" type="float" min="0" max="1" value="" optional="true" label="Minimum overlap required as a fraction of A" help="By default, intersectBed will report an overlap between A and B so long as there is at least one base pair is overlapping. Yet sometimes you may want to restrict reported overlaps between A and B to cases where the feature in B overlaps at least X% (e.g. 50%) of the A feature. This option does exactly this. Default is 1E-9 (i.e. 1bp). For example, to require that the overlap affects 50% of the BAM alignment, use 0.50."/>
<param name="r" type="boolean" truevalue="-r" falsevalue="" checked="False" label="Require that the fraction of overlap be reciprocal for A and B" help="In other words, if the minimum overlap fraction is 0.90 and this option is used, it requires that B overlap at least 90% of A and that A also overlaps at least 90% of B."/>
</when>
<when value="no"/>
</conditional>
<param name="split" type="boolean" truevalue="-split" falsevalue="" checked="false" label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." />
<param name="header" type="boolean" checked="false" truevalue="-header" falsevalue="" label="Print the header from the A file prior to results." />
<param name="strand" type="select" label="Calculate coverage based on">
<option value="">Overlaps on either strand</option>
<option value="-s">Only overlaps occurring on the **same** strand.</option>
<option value="-S">Only overlaps occurring on the **opposite** strand.</option>
</param>
</inputs>
<outputs>
<data name="output" format="bed" label="Intersection of ${input.name}">
<change_format>
<when input="intype.bed" value="" format="bam"/>
</change_format>
</data>
</outputs>
<help>
.. class:: infomark
Note that each BAM alignment is treated individually. Therefore, if one end of a paired-end alignment overlaps an interval in the BED file,
yet the other end does not, the output file will only include the overlapping end.
.. class:: infomark
Note that a BAM alignment will be sent to the output file **once** even if it overlaps more than one interval in the BED file.
**What it does**
By far, the most common question asked of two sets of genomic features is whether or not any of the
features in the two sets "overlap" with one another. This is known as feature intersection. intersectBed
allows one to screen for overlaps between two sets of genomic features. Moreover, it allows one to have
fine control as to how the intersections are reported. intersectBed works with both BED/GFF
and BAM files as input.
By default, if an overlap is found, intersectBed reports the shared interval between the two
overlapping features.
**Default behavior when using BAM input**
When comparing alignments in BAM format to features in BED format, intersectBed
will, by default, write the output in BAM format. That is, each alignment in the BAM file that meets
the user's criteria will be written in BAM format. This serves as a mechanism to
create subsets of BAM alignments are of biological interest, etc. Note that only the mate in the BAM
alignment is compared to the BED file. Thus, if only one end of a paired-end sequence overlaps with a
feature in B, then that end will be written to the BAM output. By contrast, the other mate for the
pair will not be written. One should use pairToBed if one wants each BAM alignment
for a pair to be written to BAM output.
**Output BED format when using BAM input**
When comparing alignments in BAM format to features in BED format, intersectBed
will optionally write the output in BED format. That is, each alignment in the BAM file is converted
to a 6 column BED feature and if overlaps are found (or not) based on the user's criteria, the BAM
alignment will be reported in BED format. The BED "name" field is comprised of the RNAME field in
the BAM alignment. If mate information is available, the mate (e.g., "/1" or "/2") field will be
appended to the name. The "score" field is the mapping quality score from the BAM alignment.
------
This tool is part of the `bedtools package`__ from the `Quinlan laboratory`__. If you use this tool, please cite `Quinlan AR, and Hall I.M. BEDTools: A flexible framework for comparing genomic features. Bioinformatics, 2010, 26, 6.`__
.. __: http://code.google.com/p/bedtools/
.. __: http://cphg.virginia.edu/quinlan/
.. __: http://bioinformatics.oxfordjournals.org/content/26/6/841.short
</help>
</tool>