forked from ChiLiubio/microeco_tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmicroeco-tutorial.tex
3852 lines (3242 loc) · 215 KB
/
microeco-tutorial.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
%
\documentclass[
]{book}
\usepackage{amsmath,amssymb}
\usepackage{lmodern}
\usepackage{ifxetex,ifluatex}
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
\hypersetup{
pdftitle={Tutorial for R microeco package (v0.7.0)},
pdfauthor={Chi Liu, Felipe R. P. Mansoldo, Umer Zeeshan Ijaz, Chenhao Li, Yang Cao, Jarrod J. Scott, Yaoming Cui, Alane B. Vermelho, Minjie Yao, Xiangzhen Li},
hidelinks,
pdfcreator={LaTeX via pandoc}}
\urlstyle{same} % disable monospaced font for URLs
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\usepackage{framed}
\definecolor{shadecolor}{RGB}{248,248,248}
\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\BuiltInTok}[1]{#1}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
\newcommand{\ExtensionTok}[1]{#1}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ImportTok}[1]{#1}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\NormalTok}[1]{#1}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\RegionMarkerTok}[1]{#1}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\usepackage{longtable,booktabs,array}
\usepackage{calc} % for calculating minipage widths
% Correct order of tables after \paragraph or \subparagraph
\usepackage{etoolbox}
\makeatletter
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
\makeatother
% Allow footnotes in longtable head/foot
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
\makesavenoteenv{longtable}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{5}
\usepackage{booktabs}
\usepackage{amsthm}
\makeatletter
\def\thm@space@setup{%
\thm@preskip=8pt plus 2pt minus 4pt
\thm@postskip=\thm@preskip
}
\makeatother
\ifluatex
\usepackage{selnolig} % disable illegal ligatures
\fi
\usepackage[]{natbib}
\bibliographystyle{apalike}
\title{Tutorial for R microeco package (v0.7.0)}
\author{Chi Liu, Felipe R. P. Mansoldo, Umer Zeeshan Ijaz, Chenhao Li, Yang Cao, Jarrod J. Scott, Yaoming Cui, Alane B. Vermelho, Minjie Yao, Xiangzhen Li}
\date{2022-03-23}
\begin{document}
\maketitle
{
\setcounter{tocdepth}{1}
\tableofcontents
}
\hypertarget{background}{%
\chapter{Background}\label{background}}
R language \citep{R-base} and its packages ecosystem are wonderful tools for data analysis.
In the field of microbial community ecology, many packages can be used for the data analysis,
such as vegan \citep{Jari_vegan_2019}, ape \citep{Paradis_ape_2018} and picante \citep{Picante_Kembel_2010}.
However, with the development of the high-throughput sequencing techniques,
the increasing data amount and complexity make the data mining a challenge.
There have been some R packages created for the statistics and visualization in microbial community ecology,
such as phyloseq \citep{Mcmurdie_phyloseq_2013},
microbiome (\url{https://github.com/microbiome/microbiome}), microbiomeSeq (\url{http://www.github.com/umerijaz/microbiomeSeq}),
ampvis2 (\url{https://madsalbertsen.github.io/ampvis2/reference/index.html}), MicrobiomeR(\url{https://github.com/vallenderlab/MicrobiomeR}),
theseus \citep{Price_theseus_2018}, rANOMALY \citep{Theil_rANOMALY_2021},
MicrobiomeExplorer \citep{Reeder_MicrobiomeExplorer_2021},
tidyMicro \citep{Carpenter_tidyMicro_2021}, microbial (\url{https://github.com/guokai8/microbial}),
and so on.
In addition, some web tools associated with R language are also useful for microbial community data analysis,
such as Shiny-phyloseq \citep{McMurdie_Shiny_2015}, Mian \citep{Jin_Mian_2021} and animalcules \citep{Zhao_animalcules_2021}.
However, users still lack a flexible, comprehensive and modularized R package to analyze and manage the data easily.
So we created the microeco R package \citep{Liu_microeco_2021} for this goal (\url{https://github.com/ChiLiubio/microeco}).
Besides, we also develop the file2meco package (\url{https://github.com/ChiLiubio/file2meco}) for the data input from some famous tools easily
and mecodev package (\url{https://github.com/ChiLiubio/mecodev}) for some extending approaches in this field.
\hypertarget{intro}{%
\chapter{Introduction}\label{intro}}
The microeco package has several advantages compared to other packages in R.
The main goal of developing this package is to help users analyse communtiy ecology data fast.
So a series of commonly-used and cutting-edge approaches are implemented.
To facilitate the data mining, the whole structure of microeco package are highly modularized to
make users conveniently remember, search and use.
It is notable that, beside the demonstration in the tutorial, users can also save the intermediate files in each object and
apply those files to other tools according to the format requirement.
Main files stored in the object of each class are the frequently-used data.frame format.
So the intermediate and result files are easily saved, modified and used for other tools in microbial ecology.
Before starting the specific usage of each class, let's first learn several key points.
\hypertarget{r6-class}{%
\section{R6 Class}\label{r6-class}}
All the classes in microeco package depend on the R6 class \citep{R6_Winston}.
R6 uses the encapsulated object-oriented programming paradigm,
which means that R6 is a profoundly different OO system from S3 and S4 because it is built on encapsulated objects, rather than generic functions.
If you are interested in the class features, read more from `Advanced R' book (\url{https://adv-r.hadley.nz/}).
\begin{itemize}
\item
A generic is a regular function, so it lives in the global namespace. An R6 method belongs to an object so it lives in a local namespace.
This influences how we think about naming. The methods belong to objects, not generics, and you can call them like object\$method().
\item
R6's reference semantics allow methods to simultaneously return a value and modify an object.
\item
Every R6 object has an S3 class that reflects its hierarchy of R6 class.
\end{itemize}
\hypertarget{help}{%
\section{Help}\label{help}}
The use of help documents in the microeco package may be a little different from other packages we often used.
If you wish to see one of help documents of functions, you should search the name of the class (not the name of the function)
and click the link of each function.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# first install microeco, see https://github.com/ChiLiubio/microeco}
\CommentTok{\# load package microeco}
\FunctionTok{library}\NormalTok{(microeco)}
\end{Highlighting}
\end{Shaded}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# this can show all the functions and the detailed descriptions in the microtable class}
\CommentTok{\# same with: help(microtable)}
\NormalTok{?microtable}
\end{Highlighting}
\end{Shaded}
\hypertarget{dependence}{%
\section{Dependence}\label{dependence}}
\hypertarget{important-packages}{%
\subsection{Important packages}\label{important-packages}}
To keep the start and use of microeco package simplified,
the installation of microeco only depend on several packages, which are compulsory-installed from CRAN and frequently used in the data analysis.
So the question is that you may encounter an error when using a class or function that invoke an additional package like this:
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{library}\NormalTok{(microeco)}
\FunctionTok{data}\NormalTok{(dataset)}
\NormalTok{t1 }\OtherTok{\textless{}{-}}\NormalTok{ trans\_network}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{dataset =}\NormalTok{ dataset, }\AttributeTok{cal\_cor =} \ConstantTok{NA}\NormalTok{, }\AttributeTok{taxa\_level =} \StringTok{"OTU"}\NormalTok{, }\AttributeTok{filter\_thres =} \FloatTok{0.0005}\NormalTok{)}
\NormalTok{t1}\SpecialCharTok{$}\FunctionTok{cal\_network}\NormalTok{(}\AttributeTok{network\_method =} \StringTok{"SpiecEasi"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{Error in t1$cal\_network(network\_method = "SpiecEasi"): igraph package not installed ...}
\end{Highlighting}
\end{Shaded}
The reason is that network construction require igraph package. We donot put the igraph and some other packages on the ``Imports'' part of microeco package.
In addition, some packages, e.g.~SpiecEasi, are released on github and can not be installed automatically.
The solutions:
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
install the package when encounter such an error. Actually, it's very easy to install the packages from CRAN or bioconductor or github. Just try it.
\item
install the packages in advance. This is recommended if the user is interest in most of the methods in the microeco package and want to run a large number of examples in tutorial.
\end{enumerate}
We show several packages that are published in CRAN and not installed automatically.
\begin{tabular}{l|l|l}
\hline
Package & where & description\\
\hline
MASS & trans\_diff class & linear discriminant analysis\\
\hline
GUniFrac & cal\_betadiv() & UniFrac distance matrix\\
\hline
ggpubr & plot\_alpha() & some plotting functions\\
\hline
randomForest & trans\_diff class & random forest analysis\\
\hline
ggdendro & plot\_clustering() & plotting clustering dendrogram\\
\hline
ggrepel & trans\_rda class & reduce the text overlap in the plot\\
\hline
agricolae & cal\_diff() & multiple comparisons in anova\\
\hline
gridExtra & trans\_diff class & merge plots\\
\hline
picante & cal\_alphadiv() & Faith’s phylogenetic alpha diversity\\
\hline
pheatmap & plot\_corr(pheatmap = TRUE) & correlation heatmap with clustering dendrogram\\
\hline
tidytree & trans\_diff class & plot the taxonomic tree\\
\hline
igraph & trans\_network class & network related operations\\
\hline
rgexf & save\_network() & save network with gexf style\\
\hline
ggalluvial & plot\_bar(use\_alluvium = TRUE) & alluvial plot\\
\hline
\end{tabular}
Then, if you want to install these packages or some of them, you can do like this:
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# If a package is not installed, it will be installed from CRAN.}
\CommentTok{\# First select the packages of interest}
\NormalTok{packages }\OtherTok{\textless{}{-}} \FunctionTok{c}\NormalTok{(}\StringTok{"MASS"}\NormalTok{, }\StringTok{"GUniFrac"}\NormalTok{, }\StringTok{"ggpubr"}\NormalTok{, }\StringTok{"randomForest"}\NormalTok{, }\StringTok{"ggdendro"}\NormalTok{, }\StringTok{"ggrepel"}\NormalTok{, }\StringTok{"agricolae"}\NormalTok{, }\StringTok{"gridExtra"}\NormalTok{, }\StringTok{"picante"}\NormalTok{, }\StringTok{"pheatmap"}\NormalTok{, }\StringTok{"igraph"}\NormalTok{, }\StringTok{"rgexf"}\NormalTok{, }\StringTok{"ggalluvial"}\NormalTok{, }\StringTok{"ggh4x"}\NormalTok{)}
\CommentTok{\# Now check or install}
\ControlFlowTok{for}\NormalTok{(x }\ControlFlowTok{in}\NormalTok{ packages)\{}
\ControlFlowTok{if}\NormalTok{(}\SpecialCharTok{!}\FunctionTok{require}\NormalTok{(x, }\AttributeTok{character.only =} \ConstantTok{TRUE}\NormalTok{)) \{}
\FunctionTok{install.packages}\NormalTok{(x, }\AttributeTok{dependencies =} \ConstantTok{TRUE}\NormalTok{)}
\NormalTok{ \}}
\NormalTok{\}}
\end{Highlighting}
\end{Shaded}
There are also some packages that are useful in some functions. These packages may be R packages published in bioconductor or github,
or packages written by other languages.
\hypertarget{ggtree}{%
\subsection{ggtree}\label{ggtree}}
Plotting the cladogram from LEfSe result requires the ggtree package in bioconductor (\url{https://bioconductor.org/packages/release/bioc/html/ggtree.html}).
\begin{Shaded}
\begin{Highlighting}[]
\ControlFlowTok{if}\NormalTok{ (}\SpecialCharTok{!}\FunctionTok{requireNamespace}\NormalTok{(}\StringTok{"BiocManager"}\NormalTok{, }\AttributeTok{quietly =} \ConstantTok{TRUE}\NormalTok{)) }\FunctionTok{install.packages}\NormalTok{(}\StringTok{"BiocManager"}\NormalTok{)}
\NormalTok{BiocManager}\SpecialCharTok{::}\FunctionTok{install}\NormalTok{(}\StringTok{"ggtree"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\hypertarget{spieceasi}{%
\subsection{SpiecEasi}\label{spieceasi}}
The R package SpiecEasi can be used for the network construction using SPIEC-EASI (SParse InversE Covariance Estimation for Ecological Association Inference) approach.
The package can be installed from Github \url{https://github.com/zdk123/SpiecEasi}
\hypertarget{gephi}{%
\subsection{Gephi}\label{gephi}}
Gephi is an excellent network visualization tool and used to open the saved network file,
i.e.~network.gexf in the \href{https://chiliubio.github.io/microeco_tutorial/model-based-class.html\#trans_network-class}{tutorial}.
You can download Gephi and learn how to use it from \url{https://gephi.org/users/download/}
\hypertarget{wgcna}{%
\subsection{WGCNA}\label{wgcna}}
In the correlation-based network, when the species number is very large,
the correlation algorithm in WGCNA is very fast compared to the `cor' option in trans\_network.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{install.packages}\NormalTok{(}\StringTok{"WGCNA"}\NormalTok{, }\AttributeTok{dependencies =} \ConstantTok{TRUE}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\hypertarget{tax4fun}{%
\subsection{Tax4Fun}\label{tax4fun}}
Tax4Fun is an R package used for the prediction of functional potential of prokaryotic communities.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
install Tax4Fun package
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{install.packages}\NormalTok{(}\StringTok{"RJSONIO"}\NormalTok{)}
\FunctionTok{install.packages}\NormalTok{(}\FunctionTok{system.file}\NormalTok{(}\StringTok{"extdata"}\NormalTok{, }\StringTok{"biom\_0.3.12.tar.gz"}\NormalTok{, }\AttributeTok{package=}\StringTok{"microeco"}\NormalTok{), }\AttributeTok{repos =} \ConstantTok{NULL}\NormalTok{, }\AttributeTok{type =} \StringTok{"source"}\NormalTok{)}
\FunctionTok{install.packages}\NormalTok{(}\FunctionTok{system.file}\NormalTok{(}\StringTok{"extdata"}\NormalTok{, }\StringTok{"qiimer\_0.9.4.tar.gz"}\NormalTok{, }\AttributeTok{package=}\StringTok{"microeco"}\NormalTok{), }\AttributeTok{repos =} \ConstantTok{NULL}\NormalTok{, }\AttributeTok{type =} \StringTok{"source"}\NormalTok{)}
\FunctionTok{install.packages}\NormalTok{(}\FunctionTok{system.file}\NormalTok{(}\StringTok{"extdata"}\NormalTok{, }\StringTok{"Tax4Fun\_0.3.1.tar.gz"}\NormalTok{, }\AttributeTok{package=}\StringTok{"microeco"}\NormalTok{), }\AttributeTok{repos =} \ConstantTok{NULL}\NormalTok{, }\AttributeTok{type =} \StringTok{"source"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
download SILVA123 reference data from \url{http://tax4fun.gobics.de/}\\
unzip SILVA123.zip and provide this path to the folderReferenceData parameter of cal\_tax4fun function in trans\_func class.
\end{enumerate}
\hypertarget{tax4fun2}{%
\subsection{Tax4Fun2}\label{tax4fun2}}
Tax4Fun2 is another R package for the the prediction of functional profiles and functional gene redundancies of prokaryotic communities \citep{Wemheuer_Tax4Fun2_2020}.
It has higher accuracies than PICRUSt and Tax4Fun. The Tax4Fun2 approach implemented in microeco is a little different from the original package.
Using Tax4Fun2 approach require the representative fasta file.
The user do not need to install Tax4Fun2 R package.
The only thing need to do is to download the blast tool (\textbf{ignore this if the blast tool has been in the path}) and Ref99NR/Ref100NR database (select one).
Downlaod blast tools from ``\url{ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+}'' ; e.g.~ncbi-blast-****-x64-win64.tar.gz for windows system.
Note that some errors can come from the latest versions because of memory issue (\url{https://www.biostars.org/p/413294/}).
An easy solution is to use previous version (such as 2.5.0).
Downlaod Ref99NR.zip from ``\url{https://cloudstor.aarnet.edu.au/plus/s/DkoZIyZpMNbrzSw/download}'' or Ref100NR.zip from ``\url{https://cloudstor.aarnet.edu.au/plus/s/jIByczak9ZAFUB4/download}'' .
Uncompress all the folders. The final folders should be like these structures:
blast tools:\\
\textbar-- ncbi-blast-2.5.0+\\
\textbar---- bin\\
\textbar------ blastn.exe\\
\textbar------ makeblastdb.exe\\
\textbar------ \ldots\ldots{}
Ref99NR:\\
\textbar-- Tax4Fun2\_ReferenceData\_v2\\
\textbar---- Ref99NR\\
\textbar------ otu000001.tbl.gz\\
\textbar------ \ldots\ldots{}\\
\textbar------ Ref99NR.fasta\\
\textbar------ Ref99NR.tre
The path ``Tax4Fun2\_ReferenceData\_v2'' will be required in the trans\_func\$cal\_tax4fun2() function.
The blast tool path ``ncbi-blast-2.5.0+/bin'' is also required if it is not added to the path (environmental variable).
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# Either seqinr or Biostrings package should be installed for reading and writing fasta file}
\FunctionTok{install.packages}\NormalTok{(}\StringTok{"seqinr"}\NormalTok{, }\AttributeTok{dependencies =} \ConstantTok{TRUE}\NormalTok{)}
\CommentTok{\# or install Biostrings from bioconductor https://bioconductor.org/packages/release/bioc/html/Biostrings.html}
\CommentTok{\# Now we show how to read the fasta file}
\CommentTok{\# see https://github.com/ChiLiubio/file2meco if you do not have installed file2meco}
\NormalTok{rep\_fasta\_path }\OtherTok{\textless{}{-}} \FunctionTok{system.file}\NormalTok{(}\StringTok{"extdata"}\NormalTok{, }\StringTok{"rep.fna"}\NormalTok{, }\AttributeTok{package=}\StringTok{"file2meco"}\NormalTok{)}
\NormalTok{rep\_fasta }\OtherTok{\textless{}{-}}\NormalTok{ seqinr}\SpecialCharTok{::}\FunctionTok{read.fasta}\NormalTok{(rep\_fasta\_path)}
\CommentTok{\# or use Biostrings::readDNAStringSet(rep\_fasta\_path)}
\CommentTok{\# then see the help document of microtable class about the rep\_fasta in microtable$new().}
\end{Highlighting}
\end{Shaded}
\hypertarget{plotting}{%
\section{Plotting}\label{plotting}}
Most of the plotting in the package rely on the ggplot2 package system.
We provide some parameters to change the corresponding plot, but it may be far from enough.
The user can also assign the output a name and use the ggplot2-style grammers to modify it.
Each data table used for plotting is stored in the object and can be saved for the customized analysis and plotting.
Of course, the user can also directly modify the class and reload them to use.
Any contribution of a modified class is also appreciated via github pushing or email (\href{mailto:[email protected]}{\nolinkurl{[email protected]}}).
\hypertarget{basic-class}{%
\chapter{Basic class}\label{basic-class}}
The microtable class is the basic class.
All the other classes depend on the microtable class.
\hypertarget{microtable-class}{%
\section{microtable class}\label{microtable-class}}
Many tools can be used for the bioinformatic analysis of amplicon sequencing data, such as QIIME \citep{Caporaso_QIIME_2010}, QIIME2 \citep{Bolyen_Reproducible_2019},
usearch (\url{https://www.drive5.com/usearch/}), mothur \citep{Schloss_Introducing_2009},
SILVAngs (\url{https://ngs.arb-silva.de/silvangs/}),
and RDP (\url{http://rdp.cme.msu.edu/}).
Although the formats of results may be distinctive across various tools, the main files can be generally classified into the following parts:
(1) OTU/ASV table, i.e.~the species-sample abundance table;
(2) taxonomy table, the taxonomic assignment table;
(3) representative sequences;
(4) phylogenetic tree;
(5) metadata. It is generally useful to create a detailed sample information table to store all the sample information,
including the environmental data.
The microtable class is the basic class and designed to store the basic data for all the downstream analysis in the microeco package.
At least, the OTU table (i.e.~species-sample abundance table) should be provided for creating microtable object.
Thus, the microtable class can recognize the sample information table is missing and create a default sample table according to
the sample names in otu\_table.
To make the file reading more convenient,
we also build another R package file2meco (\url{https://github.com/ChiLiubio/file2meco}) to read the output files of some tools into microtable object.
Currently, those tools/softwares include not only commonly-used QIIME \citep{Caporaso_QIIME_2010} and QIIME2\citep{Bolyen_Reproducible_2019},
but also some metagenomic tools, such as HUMAnN \citep{Franzosa_Species_2018} and kraken2 \citep{Wood_Improved_2019}.
In this tutorial, we use the data inside the package microeco to show some basic operations.
\hypertarget{example}{%
\subsection{Example}\label{example}}
The 16S rRNA gene sequencing results in the example data of the package is used to show the main part of the tutorial.
This dataset is the 16S rRNA gene Miseq sequencing results of wetland soils in China published by An et al. \citep{An_Soil_2019},
who surveyed soil bacterial communities in Chinese inland wetlands (IW),
coastal wetland (CW) and Tibet plateau wetlands (TW) using amplicon sequencing.
These wetlands include both saline and non-saline samples.
The sample information table have 4 columns: ``SampleID'', ``Group'', ``Type'' and ``Saline''.
The column ``SampleID'' is same with the rownames.
The column ``Group'' represents the IW, CW and TW.
The column ``Type'' represents the sampling region: northeastern region (NE), northwest region (NW), North China area (NC),
middle-lower reaches of the Yangtze River (YML), southern coastal area (SC), upper reaches of the Yangtze River (YU), Qinghai-Tibet Plateau (QTP).
The column ``Saline'' represents the saline soils and non-saline soils.
In this dataset, the environmental factor table is separated from the sample information table.
It is of course doable to put all the environmental data into sample information table.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{library}\NormalTok{(microeco)}
\CommentTok{\# load the example data; 16S rRNA gene amplicon sequencing dataset}
\FunctionTok{data}\NormalTok{(sample\_info\_16S)}
\FunctionTok{data}\NormalTok{(otu\_table\_16S)}
\FunctionTok{data}\NormalTok{(taxonomy\_table\_16S)}
\CommentTok{\# use phylogenetic tree to calculate phylogeny{-}based alpha and beta metrics}
\FunctionTok{data}\NormalTok{(phylo\_tree\_16S)}
\CommentTok{\# load the environmental data which is detached from sample table}
\FunctionTok{data}\NormalTok{(env\_data\_16S)}
\CommentTok{\# use pipe operator in magrittr package}
\FunctionTok{library}\NormalTok{(magrittr)}
\CommentTok{\# set.seed is used to fix the random number generation to make the results repeatable}
\FunctionTok{set.seed}\NormalTok{(}\DecValTok{123}\NormalTok{)}
\CommentTok{\# make the plotting background same with the tutorial}
\FunctionTok{library}\NormalTok{(ggplot2)}
\FunctionTok{theme\_set}\NormalTok{(}\FunctionTok{theme\_bw}\NormalTok{())}
\end{Highlighting}
\end{Shaded}
Make sure that the data types of sample\_table, otu\_table and tax\_table are all data.frame as the following part shows.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(otu\_table\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "data.frame"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{otu\_table\_16S[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{, }\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{]}
\end{Highlighting}
\end{Shaded}
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.21}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.07}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.07}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.07}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.07}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.08}}@{}}
\toprule
~ & S1 & S2 & S3 & S4 & S5 \\
\midrule
\endhead
\textbf{OTU\_4272} & 1 & 0 & 1 & 1 & 0 \\
\textbf{OTU\_236} & 1 & 4 & 0 & 2 & 35 \\
\textbf{OTU\_399} & 9 & 2 & 2 & 4 & 4 \\
\textbf{OTU\_1556} & 5 & 18 & 7 & 3 & 2 \\
\textbf{OTU\_32} & 83 & 9 & 19 & 8 & 102 \\
\bottomrule
\end{longtable}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(taxonomy\_table\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "data.frame"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{taxonomy\_table\_16S[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{, }\DecValTok{1}\SpecialCharTok{:}\DecValTok{3}\NormalTok{]}
\end{Highlighting}
\end{Shaded}
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}
>{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.18}}
>{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}}
>{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.32}}@{}}
\toprule
~ & Kingdom & Phylum & Class \\
\midrule
\endhead
\textbf{OTU\_4272} & k\_\_Bacteria & p\_\_Firmicutes & c\_\_Bacilli \\
\textbf{OTU\_236} & k\_\_Bacteria & p\_\_Chloroflexi & c\_\_ \\
\textbf{OTU\_399} & k\_\_Bacteria & p\_\_Proteobacteria & c\_\_Betaproteobacteria \\
\textbf{OTU\_1556} & k\_\_Bacteria & p\_\_Acidobacteria & c\_\_Acidobacteria \\
\textbf{OTU\_32} & k\_\_Archaea & p\_\_Miscellaneous
Crenarchaeotic Group & c\_\_ \\
\bottomrule
\end{longtable}
Generally, users' taxonomic table may have some chaotic information, such as NA, unidentified and unknown.
These information can potentially influence the following taxonomic abundance calculation and other taxonomy-based analysis.
So it is usually necessary to clean this data using the \textbf{tidy\_taxonomy} function.
Another very important result of this operation is to \textbf{unify the taxonomic prefix} automatically,
e.g.~transforming D\_1\_\_ to p\_\_ for phylum level or adding p\_\_ to phylum directly.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# make the taxonomic information unified, very important}
\NormalTok{taxonomy\_table\_16S }\SpecialCharTok{\%\textless{}\textgreater{}\%}\NormalTok{ tidy\_taxonomy}
\end{Highlighting}
\end{Shaded}
The rownames of sample\_table in microtable object (i.e.~sample names) are used for selecting samples/groups in all the related operations in the package.
\textbf{Before creating microtable object, make sure that the rownames of sample information table are sample names}.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(sample\_info\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "data.frame"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{sample\_info\_16S[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{, ]}
\end{Highlighting}
\end{Shaded}
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 8\tabcolsep) * \real{0.12}}
>{\centering\arraybackslash}p{(\columnwidth - 8\tabcolsep) * \real{0.15}}
>{\centering\arraybackslash}p{(\columnwidth - 8\tabcolsep) * \real{0.11}}
>{\centering\arraybackslash}p{(\columnwidth - 8\tabcolsep) * \real{0.10}}
>{\centering\arraybackslash}p{(\columnwidth - 8\tabcolsep) * \real{0.25}}@{}}
\toprule
~ & SampleID & Group & Type & Saline \\
\midrule
\endhead
\textbf{S1} & S1 & IW & NE & Non-saline soil \\
\textbf{S2} & S2 & IW & NE & Non-saline soil \\
\textbf{S3} & S3 & IW & NE & Non-saline soil \\
\textbf{S4} & S4 & IW & NE & Non-saline soil \\
\textbf{S5} & S5 & IW & NE & Non-saline soil \\
\bottomrule
\end{longtable}
In this example, the environmental data is stored in the env\_data\_16S alone.
The user can also directly integrate those data into the sample information table.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(env\_data\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "data.frame"
\end{verbatim}
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.12}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.15}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.16}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.15}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.19}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.22}}@{}}
\toprule
~ & Latitude & Longitude & Altitude & Temperature & Precipitation \\
\midrule
\endhead
\textbf{S1} & 52.96 & 122.6 & 432 & -4.2 & 445 \\
\textbf{S2} & 52.95 & 122.6 & 445 & -4.3 & 449 \\
\textbf{S3} & 52.95 & 122.6 & 430 & -4.3 & 449 \\
\textbf{S4} & 52.95 & 122.6 & 430 & -4.3 & 449 \\
\textbf{S5} & 52.95 & 122.6 & 429 & -4.3 & 449 \\
\bottomrule
\end{longtable}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(phylo\_tree\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "phylo"
\end{verbatim}
Then, we create an object of microtable class.
This operation is very similar with the package phyloseq\citep{Mcmurdie_phyloseq_2013}, but in microeco it is more brief.
The otu\_table in the microtable class must be the species-sample format: rownames - OTU/ASV/other names; colnames - sample names.
\textbf{The colnames in otu\_table must have overlap with rownames of sample\_table}.
Otherwise, the following check can filter all the samples of otu\_table because of no same sample names between otu\_table and sample\_table.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# In R6 class, \textquotesingle{}$new\textquotesingle{} is the original method used to create a new object of class}
\CommentTok{\# If you only provide abundance table, the class can help you create a sample info table}
\NormalTok{dataset }\OtherTok{\textless{}{-}}\NormalTok{ microtable}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{otu\_table =}\NormalTok{ otu\_table\_16S)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## No sample_table provided, automatically use colnames in otu_table to create one ...
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{class}\NormalTok{(dataset)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "microtable" "R6"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# generally add the metadata}
\NormalTok{dataset }\OtherTok{\textless{}{-}}\NormalTok{ microtable}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{otu\_table =}\NormalTok{ otu\_table\_16S, }\AttributeTok{sample\_table =}\NormalTok{ sample\_info\_16S)}
\NormalTok{dataset}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 90 rows and 4 columns
## otu_table have 13628 rows and 90 columns
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# Let\textquotesingle{}s create a microtable object with more information}
\NormalTok{dataset }\OtherTok{\textless{}{-}}\NormalTok{ microtable}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{sample\_table =}\NormalTok{ sample\_info\_16S, }\AttributeTok{otu\_table =}\NormalTok{ otu\_table\_16S, }\AttributeTok{tax\_table =}\NormalTok{ taxonomy\_table\_16S, }\AttributeTok{phylo\_tree =}\NormalTok{ phylo\_tree\_16S)}
\NormalTok{dataset}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 90 rows and 4 columns
## otu_table have 13628 rows and 90 columns
## tax_table have 13628 rows and 7 columns
## phylo_tree have 14096 tips
\end{verbatim}
If the users want to know more details on microtable class,
please see the help document of the class using the following help command.
For example, see the phylo\_tree parameter of microtable\$new() for reading phylogenetic tree.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# search the class name, not the function name}
\NormalTok{?microtable}
\end{Highlighting}
\end{Shaded}
Then, we remove OTUs which are not assigned in the Kingdom "k\_\_Archaea" or "k\_\_Bacteria".
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{dataset}\SpecialCharTok{$}\NormalTok{tax\_table }\SpecialCharTok{\%\textless{}\textgreater{}\%}\NormalTok{ base}\SpecialCharTok{::}\FunctionTok{subset}\NormalTok{(Kingdom }\SpecialCharTok{==} \StringTok{"k\_\_Archaea"} \SpecialCharTok{|}\NormalTok{ Kingdom }\SpecialCharTok{==} \StringTok{"k\_\_Bacteria"}\NormalTok{)}
\NormalTok{dataset}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 90 rows and 4 columns
## otu_table have 13628 rows and 90 columns
## tax_table have 13330 rows and 7 columns
## phylo_tree have 14096 tips
\end{verbatim}
We also remove OTUs with the taxonomic assignments ``mitochondria'' or ``chloroplast''.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# This will remove the lines containing the taxa word regardless of taxonomic ranks and ignoring word case in the tax\_table.}
\CommentTok{\# So if you want to filter some taxa not considerd pollutions, please use subset like the previous operation to filter tax\_table.}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{filter\_pollution}\NormalTok{(}\AttributeTok{taxa =} \FunctionTok{c}\NormalTok{(}\StringTok{"mitochondria"}\NormalTok{, }\StringTok{"chloroplast"}\NormalTok{))}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## Total 34 taxa are removed from tax_table ...
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{print}\NormalTok{(dataset)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 90 rows and 4 columns
## otu_table have 13628 rows and 90 columns
## tax_table have 13296 rows and 7 columns
## phylo_tree have 14096 tips
\end{verbatim}
To make the OTU and sample information consistent across all files in the dataset object, we use function \textbf{tidy\_dataset} to trim the dataset.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{tidy\_dataset}\NormalTok{()}
\FunctionTok{print}\NormalTok{(dataset)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 90 rows and 4 columns
## otu_table have 13296 rows and 90 columns
## tax_table have 13296 rows and 7 columns
## phylo_tree have 13296 tips
\end{verbatim}
Then let's use sample\_sums() to check the sequence numbers in each sample.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{sample\_sums}\NormalTok{() }\SpecialCharTok{\%\textgreater{}\%}\NormalTok{ range}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] 10316 37087
\end{verbatim}
Sometimes, in order to reduce the effects of sequencing depth on the diversity measurements,
it is optional perform the resampling to make the sequence number equal for each sample.
The function rarefy\_samples can invoke the function tidy\_dataset automatically before and after the rarefying.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# As an example, use 10000 sequences in each sample}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{rarefy\_samples}\NormalTok{(}\AttributeTok{sample.size =} \DecValTok{10000}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## 530 OTUs were removed because they are no longer present in any sample after random subsampling ...
\end{verbatim}
\begin{verbatim}
## 530 taxa are removed from the otu_table, as the abundance is 0 ...
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{sample\_sums}\NormalTok{() }\SpecialCharTok{\%\textgreater{}\%}\NormalTok{ range}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] 10000 10000
\end{verbatim}
Then, let's calculate the taxa abundance at each taxonomic rank using cal\_abund().
This function \textbf{return a list called taxa\_abund stored in the microtable object}.
This list contain several data frame of the abundance information at each taxonomic rank.
It's worth noting that the cal\_abund() function can be used to \textbf{solve more complicated cases with special parameters},
such as supporting both the relative and absolute abundance calculation and selecting the partial `taxonomic' columns.
Those have been shown in file2meco package part (\url{https://chiliubio.github.io/microeco_tutorial/file2meco-package.html\#humann-metagenomic-results}) with complex metagenomic dataset.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# use default parameters}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{cal\_abund}\NormalTok{()}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## The result is stored in object$taxa_abund ...
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# return dataset$taxa\_abund}
\FunctionTok{class}\NormalTok{(dataset}\SpecialCharTok{$}\NormalTok{taxa\_abund)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "list"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# show part of the relative abundance at Phylum level}
\NormalTok{dataset}\SpecialCharTok{$}\NormalTok{taxa\_abund}\SpecialCharTok{$}\NormalTok{Phylum[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{, }\DecValTok{1}\SpecialCharTok{:}\DecValTok{5}\NormalTok{]}
\end{Highlighting}
\end{Shaded}
\begin{longtable}[]{@{}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.44}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.11}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.11}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.11}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.11}}
>{\centering\arraybackslash}p{(\columnwidth - 10\tabcolsep) * \real{0.11}}@{}}
\toprule
~ & S1 & S2 & S3 & S4 & S5 \\
\midrule
\endhead
**k\_\_Bacteria\textbar p\_\_Proteobacteria** & 0.2008 & 0.1996 & 0.2151 & 0.261 & 0.1663 \\
**k\_\_Bacteria\textbar p\_\_Chloroflexi** & 0.1215 & 0.1937 & 0.1588 & 0.1471 & 0.3098 \\
**k\_\_Bacteria\textbar p\_\_Bacteroidetes** & 0.1816 & 0.0359 & 0.0267 & 0.0215 & 0.0266 \\
**k\_\_Bacteria\textbar p\_\_Acidobacteria** & 0.1215 & 0.2467 & 0.2532 & 0.262 & 0.2482 \\
**k\_\_Bacteria\textbar p\_\_Actinobacteria** & 0.1182 & 0.0861 & 0.0875 & 0.0954 & 0.0824 \\
\bottomrule
\end{longtable}
The function save\_abund() can be used to save the taxa abundance file to a local place easily.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{save\_abund}\NormalTok{(}\AttributeTok{dirpath =} \StringTok{"taxa\_abund"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
Then, let's calculate the alpha diversity.
The result is also stored in the object microtable automatically.
For the definition of each alpha diversity index, please see \url{http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# If you want to add Faith\textquotesingle{}s phylogenetic diversity, use PD = TRUE, this will be a little slow}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{cal\_alphadiv}\NormalTok{(}\AttributeTok{PD =} \ConstantTok{FALSE}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## The result is stored in object$alpha_diversity ...
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# return dataset$alpha\_diversity}
\FunctionTok{class}\NormalTok{(dataset}\SpecialCharTok{$}\NormalTok{alpha\_diversity)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "data.frame"
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# save dataset$alpha\_diversity to a directory}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{save\_alphadiv}\NormalTok{(}\AttributeTok{dirpath =} \StringTok{"alpha\_diversity"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
Let's go on to beta diversity with function cal\_betadiv().
We provide four most frequently used indexes: Bray-curtis, Jaccard, weighted Unifrac and unweighted unifrac.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# If you do not want to calculate unifrac metrics, use unifrac = FALSE}
\CommentTok{\# require GUniFrac package installed}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{cal\_betadiv}\NormalTok{(}\AttributeTok{unifrac =} \ConstantTok{TRUE}\NormalTok{)}
\CommentTok{\# return dataset$beta\_diversity}
\FunctionTok{class}\NormalTok{(dataset}\SpecialCharTok{$}\NormalTok{beta\_diversity)}
\CommentTok{\# save dataset$beta\_diversity to a directory}
\NormalTok{dataset}\SpecialCharTok{$}\FunctionTok{save\_betadiv}\NormalTok{(}\AttributeTok{dirpath =} \StringTok{"beta\_diversity"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\hypertarget{other-examples}{%
\subsection{Other examples}\label{other-examples}}
From v0.7.0, the microtable\$new has a new parameter auto\_tidy. if auto\_tidy = TRUE, the function can automatically use tidy\_dataset to make all files uniform.
Then, all other functions in microtable will also do this. But if the user changes the file in microtable object,
the class can not recognize this modification, the user should use tidy\_dataset function to manually trim the microtable object.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{test }\OtherTok{\textless{}{-}}\NormalTok{ microtable}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{sample\_table =}\NormalTok{ sample\_info\_16S[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{40}\NormalTok{, ], }\AttributeTok{otu\_table =}\NormalTok{ otu\_table\_16S, }\AttributeTok{auto\_tidy =} \ConstantTok{FALSE}\NormalTok{)}
\NormalTok{test}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 40 rows and 4 columns
## otu_table have 13628 rows and 90 columns
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{test1 }\OtherTok{\textless{}{-}}\NormalTok{ microtable}\SpecialCharTok{$}\FunctionTok{new}\NormalTok{(}\AttributeTok{sample\_table =}\NormalTok{ sample\_info\_16S[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{40}\NormalTok{, ], }\AttributeTok{otu\_table =}\NormalTok{ otu\_table\_16S, }\AttributeTok{auto\_tidy =} \ConstantTok{TRUE}\NormalTok{)}
\NormalTok{test1}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 40 rows and 4 columns
## otu_table have 12747 rows and 40 columns
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{test1}\SpecialCharTok{$}\NormalTok{sample\_table }\SpecialCharTok{\%\textless{}\textgreater{}\%}\NormalTok{ .[}\DecValTok{1}\SpecialCharTok{:}\DecValTok{10}\NormalTok{, ]}
\NormalTok{test1}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 10 rows and 4 columns
## otu_table have 12747 rows and 40 columns
\end{verbatim}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{test1}\SpecialCharTok{$}\FunctionTok{tidy\_dataset}\NormalTok{()}
\NormalTok{test1}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## microtable class:
## sample_table have 10 rows and 4 columns