-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
1043 lines (1014 loc) · 77 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.3.450">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Rebecca">
<title>240410 Brown Bag</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
</style>
<script src="index_files/libs/clipboard/clipboard.min.js"></script>
<script src="index_files/libs/quarto-html/quarto.js"></script>
<script src="index_files/libs/quarto-html/popper.min.js"></script>
<script src="index_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="index_files/libs/quarto-html/anchor.min.js"></script>
<link href="index_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="index_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="index_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="index_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="index_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">240410 Brown Bag</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Rebecca </p>
</div>
</div>
</div>
</header>
<section id="commit---pull---push" class="level1">
<h1>Commit - Pull - Push</h1>
<section id="connecting-github-to-rstudio-for-reproducible-research" class="level2">
<h2 class="anchored" data-anchor-id="connecting-github-to-rstudio-for-reproducible-research">Connecting GitHub to RStudio for Reproducible Research</h2>
<p><strong>Note:</strong> Brown Bag attendees will learn more from my presentation if they 1) bring their laptop or follow along on Zoom, 2) have <a href="https://posit.co/download/rstudio-desktop/">RStudio</a> loaded already to their machine, and 3) make sure that Git is either installed on your machine and the path to the Git executable is added to your system’s PATH environment variable.</p>
<div class="callout callout-style-default callout-important callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Important
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>OBJECTIVES:</strong></p>
<ol type="1">
<li><p>Participants will understand that revision control supports reproducible and replaceable research</p></li>
<li><p>Participants will become familiar with a RStudio <-> GitHub workflow for revision control</p></li>
<li><p>(if time) Participants will participate in a code conflict resolution exercise</p></li>
</ol>
</div>
</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>Subnote on YAML:</strong></p>
<p>YAML Is the document’s metadata which sets guidelines on how your want the output of your document to look like. It is located at the top of your file, delineated by three dashes (<code>---</code>) at the top and at the bottom of it. It can be used to specify:</p>
<ul>
<li><p>Characteristics of your documents such at title, author, date of creation.</p></li>
<li><p>Argument to pass on the building process to control the format of the output.</p></li>
<li><p>Add additional information such as the bibliography file (and formatting of the references)</p></li>
<li><p>Specific parameters for your report (eg: just used a subset of the data).</p></li>
</ul>
</div>
</div>
<hr>
<blockquote class="blockquote">
<p>The information in this presentation was shamelessly taken from the February, 2024 materials from the Arctic Data Center training; <a href="https://learning.nceas.ucsb.edu/2024-02-arctic/">Reproducible Approaches to Arctic Research Using R</a></p>
<ul>
<li>Jeanette Clark, Angie Garcia, Matthew B. Jones, Justin Kadi, Maggie Klope, Camila Vargas Poulsen (2024), Reproducible Approaches to Arctic Research Using R. Arctic Data Center. URL <a href="https://learning.nceas.ucsb.edu/2024-02-arctic" class="uri">https://learning.nceas.ucsb.edu/2024-02-arctic</a>.</li>
</ul>
<p>You may also find these materials helpful for your work:</p>
<ul>
<li><p>Angie Garcia, Matthew B. Jones, Justin Kadi, Maggie Klope, Camila Vargas Poulsen (2024), Fundamentals in Data Management for Qualitative and Quantitative Arctic Research. Arctic Data Center. URL <a href="https://learning.nceas.ucsb.edu/2024-01-arctic" class="uri">https://learning.nceas.ucsb.edu/2024-01-arctic</a>.</p></li>
<li><p>S. Jeanette Clark, Matthew B. Jones, Samantha Csik, Carmen Galaz García, Bryce Mecum, Natasha Haycock-Chavez, Daphne Virlar-Knight, Juliet Cohen, Anna Liljedahl. 2023. Scalable and Computationally Reproducible Approaches to Arctic Research. Arctic Data Center. <a href="https://doi.org/10.18739/A2QF8JM2V">doi:10.18739/A2QF8JM2V</a></p></li>
</ul>
</blockquote>
<hr>
</section>
</section>
<section id="lets-get-started-what-is-reproducible-research" class="level1">
<h1>Let’s get started: What is Reproducible Research?</h1>
<p>Reproducible analysis allow you to automatize how the figures and the statistics in your analysis are generated. This process also helps your collaborators, your readers and your future self to follow your code trail the leads to the original data, increasing the transparency of your science.</p>
<p>Literate analysis help reduce the mistakes from copying and pasting across software, keeps results and models in sync, and allows you to provide interested readers with more information about the different approaches and analyses you tried before coming up with the final results (British Ecological Society (<a href="https://learning.nceas.ucsb.edu/2024-02-arctic/session_03.html#ref-britecolsoc2017">2017</a>)).</p>
<p>Every file in the scientific process changes. Manuscripts are edited. Figures get revised. Code gets fixed when bugs are discovered. Sometimes those fixes lead to even more bugs, leading to more changes in the code base. Data files get combined together. Sometimes those same files are split and combined again. In just one research project, we can expect thousands of changes to occur.</p>
<p>These changes are important to track, and yet, we often use simplistic file names to do so. Many of us have experienced renaming a document or script multiple times with the ingenuine addition of “final” to the file name (like the comic above demonstrates).</p>
<p>You might think there is a better way, and you’d be right: <strong>version control</strong>. Version control provides an organized and transparent way to track changes in code and additional files. This practice was designed for software development, but is easily applicable to scientific programming.</p>
<p>There are many benefits to using a version control software including:</p>
<ul>
<li><p><strong>Maintain a history</strong> of your research project’s development while keeping your workspace clean</p></li>
<li><p><strong>Facilitate collaboration</strong> and transparency when working on teams</p></li>
<li><p><strong>Explore bugs or new features</strong> without disrupting your team members’ work</p></li>
<li><p>and more!</p></li>
</ul>
<p>The version control system we’ll be diving into is Git, the most widely used modern version control system in the world.</p>
<section id="with-git-we-can-enhance-our-workflow" class="level4">
<h4 class="anchored" data-anchor-id="with-git-we-can-enhance-our-workflow">With Git we can enhance our workflow:</h4>
<ul>
<li><p><strong>Eliminate</strong> the need for <strong>cryptic filenames</strong> and comments to track our work.</p></li>
<li><p>Provide <strong>detailed descriptions of our changes</strong> through commits, making it easier to understand the reasons behind code modifications.</p></li>
<li><p>Work on multiple <strong>branches</strong> simultaneously, allowing for parallel development, and optionally merge them together.</p></li>
<li><p>Use commits to <strong>access and even execute older versions</strong> of our code.</p></li>
<li><p><strong>Assign meaningful tags</strong> to specific versions of our code.</p></li>
<li><p>Additionally, Git offers a powerful distributed feature. <strong>Multiple individuals can work on the same analysis concurrently</strong> on their own computers, with the ability to merge everyone’s changes together.</p></li>
</ul>
<hr>
</section>
</section>
<section id="but-before-we-dive-into-git-and-github" class="level1">
<h1>But Before we dive into Git and GitHub…</h1>
<section id="in-summary-reproducible-research-is" class="level2">
<h2 class="anchored" data-anchor-id="in-summary-reproducible-research-is">In Summary, Reproducible Research is:</h2>
<p>Working in a reproducible manner:</p>
<ul>
<li><p>Increases research efficiency, accelerating the pace of your research and collaborations.</p></li>
<li><p>Provides transparency by capturing and communicating scientific workflows.</p></li>
<li><p>Enables research to stand on the shoulders of giants (build on work that came before).</p></li>
<li><p>Allows credit for secondary usage and supports easy attribution.</p></li>
<li><p>Increases trust in science.</p></li>
</ul>
<p>To enable others to fully interpret, reproduce or build upon our research, we need to provide more comprehensive information than is typically included in a figure or publication. The <strong>methods sections of papers are typically inadequate to fully reproduce the work described in the paper</strong>.</p>
<p><strong>Computational reproducibility</strong> is the ability to document data, analyses, and models sufficiently for other researchers to be able to understand and ideally re-execute the computations that led to scientific results and conclusions.</p>
<p>Practically speaking, reproducibility includes:</p>
<ul>
<li><p>Preserving the data</p></li>
<li><p>Preserving the software workflow</p></li>
<li><p>Documenting what you did</p></li>
<li><p>Describing how to interpret it all</p></li>
</ul>
<p>A recent study of publicly-available datasets in the Harvard Database repository containing R files found that only 26% of R files ran without error in the initial execution. 44% were able to be run after code cleaning, showing the importance of good programming practice (<a href="https://learning.nceas.ucsb.edu/2024-02-arctic/session_20.html#ref-trisovic2022">Trisovic et al. 2022</a>). The figure below from Trisovic et al. shows a <em>sankey diagram</em> of how code cleaning was able to fix common errors.</p>
<p><img src="240410_brown_bag_files/code-reproducibility-trisovic.png" class="img-fluid" width="880"></p>
</section>
<section id="computational-provenance-and-workflows" class="level2">
<h2 class="anchored" data-anchor-id="computational-provenance-and-workflows"><strong>Computational Provenance and Workflows</strong></h2>
<p>Computational provenance refers to the origin and processing history of data including:</p>
<ul>
<li><p>Input data</p></li>
<li><p>Workflow/scripts</p></li>
<li><p>Output data</p></li>
<li><p>Figures</p></li>
<li><p>Methods, dataflow, and dependencies</p></li>
</ul>
<p>When we put these all together with formal documentation, we create a <strong>computational workflow</strong> that captures all of the steps from initial data cleaning and integration, through analysis, modeling, and visualization. In other words, <strong>computational provenance is a formalized description of a workflow from the origin of the data to it’s final outcome</strong>.</p>
<p>Here’s an example of a computational workflow from Mark Carls: <a href="https://search.dataone.org/view/urn%3Auuid%3A3249ada0-afe3-4dd6-875e-0f7928a4c171">Mark Carls. Analysis of hydrocarbons following the Exxon Valdez oil spill, Gulf of Alaska, 1989 - 2014. Gulf of Alaska Data Portal. urn:uuid:3249ada0-afe3-4dd6-875e-0f7928a4c171.</a>, that represents a three step workflow comprising four source data files and two output visualizations.</p>
<p><img src="240410_brown_bag_files/comp-workflow-1.png" class="img-fluid" width="920"></p>
</section>
</section>
<section id="now-lets-dive-into-git-and-github" class="level1">
<h1>NOW Let’s dive into Git and GitHub:</h1>
<hr>
<div class="callout callout-style-default callout-important callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Check if Git is Installed
</div>
</div>
<div class="callout-body-container callout-body">
<p>First, ensure that Git is indeed installed on your computer. You can do this by trying to run <code>git --version</code> in your terminal or command prompt. If Git is installed, this command will return the version of Git you have installed.</p>
<section id="install-git" class="level2">
<h2 class="anchored" data-anchor-id="install-git">Install Git</h2>
<p>If Git is not installed, you’ll need to download and install it. You can download Git from <a href="https://git-scm.com">git-scm.com</a>. During the installation process, there is an option to add Git to your PATH. Make sure this option is selected.</p>
<p>Are you getting any errors? <a href="240410_brown_bag.html">Setting the Path to Git Manually</a></p>
</section>
</div>
</div>
<hr>
<section id="what-exactly-are-git-and-github" class="level3">
<h3 class="anchored" data-anchor-id="what-exactly-are-git-and-github"><strong>What <em>exactly</em> are Git and GitHub?</strong></h3>
<p><img src="240410_brown_bag_files/git-intro.png" class="img-fluid" width="417"></p>
<section id="git" class="level4">
<h4 class="anchored" data-anchor-id="git">Git:</h4>
<ul>
<li><p>an open-source distributed <strong>version control</strong> software</p></li>
<li><p>designed to manage the versioning and tracking of source code files and project history</p></li>
<li><p><strong>operates locally</strong> on your computer, allowing you to create repositories, and track changes</p></li>
<li><p>provides features such as committing changes, branching and merging code, reverting to previous versions, and managing project history</p></li>
<li><p>works directly with the files on your computer and does not require a network connection to perform most operations</p></li>
<li><p>primarily used through the command-line interface (CLI, e.g. Terminal), but also has various GUI tools available (e.g. RStudio IDE)</p>
<p><img src="240410_brown_bag_files/github-intro.png" class="img-fluid" width="428"></p></li>
</ul>
</section>
<section id="github" class="level4">
<h4 class="anchored" data-anchor-id="github">GitHub:</h4>
<ul>
<li><p><strong>online platform</strong> and service built around Git</p></li>
<li><p>provides a <strong>centralized hosting platform for Git repositories</strong></p></li>
<li><p>allows us to store, manage, and collaborate on their Git repositories in the cloud</p></li>
<li><p>offers additional features on top of Git, such as a web-based interface, issue tracking, project management tools, pull requests, code review, and collaboration features</p></li>
<li><p>enables easy sharing of code with others, facilitating collaboration and contribution to open source projects</p></li>
<li><p>provides a social aspect, allowing users to follow projects, star repositories, and discover new code</p></li>
</ul>
</section>
</section>
<section id="understanding-how-local-working-files-git-and-github-all-work-together" class="level3">
<h3 class="anchored" data-anchor-id="understanding-how-local-working-files-git-and-github-all-work-together"><strong>Understanding how local working files, Git, and GitHub all work together</strong></h3>
<p>It can be a bit daunting to understand all the moving parts of the Git / GitHub life cycle (i.e. how file changes are tracked locally within repositories, then stored for safe-keeping and collaboration on remote repositories, then brought back down to a local machine(s) for continued development). It gets easier with practice, but we’ll explain (first in words, then with an illustration) at a high-level how things work:</p>
<section id="what-is-the-difference-between-a-normal-folder-vs.-a-git-repository" class="level4">
<h4 class="anchored" data-anchor-id="what-is-the-difference-between-a-normal-folder-vs.-a-git-repository">6.2.3.1 What is the difference between a “normal” folder vs. a Git repository</h4>
<p>Whether you’re a Mac or a PC user, you’ll likely have created a folder at some point in time for organizing files. Let’s pretend that we create a folder, called <code>myFolder/</code>, and add two files: <code>myData.csv</code> and <code>myAnalysis.R</code>. The contents of this folder are not currently version controlled – meaning, for example, that if we make some changes to <code>myAnalysis.R</code> that don’t quite work out, we have no way of accessing or reverting back to a previous version of <code>myAnalysis.R</code> (without remembering/rewriting things, of course).</p>
<p>Git allows you to turn any “normal” folder, like <code>myFolder/</code>, into a <strong>Git repository</strong> – you’ll often see/hear this referenced as “initializing a Git repository”. When you initialize a folder on your local computer as a Git repository, a hidden <code>.git/</code> folder is created <em>within</em> that folder (e.g. <code>myFolder/.git/</code>) – this <code>.git/</code> folder <em>is</em> the <strong>Git repository</strong>. As you use Git commands to capture versions or “snapshots” of your work, those versions (and their associated metadata) get stored within the <code>.git/</code> folder. This allows you to access and/or recover any previous versions of your work. If you delete <code>.git/</code>, you delete your project’s history.</p>
<p>Here is our example folder / Git repository represented visually:</p>
<p><img src="240410_brown_bag_files/git-repo.png" class="img-fluid"></p>
</section>
<section id="my-versioned-work-is-on-my-local-computer-but-i-want-to-send-it-to-github.-how" class="level4">
<h4 class="anchored" data-anchor-id="my-versioned-work-is-on-my-local-computer-but-i-want-to-send-it-to-github.-how">My versioned work is on my local computer, but I want to send it to GitHub. How?</h4>
<p>The last step is synchronizing the changes made to our local repository with a remote repository (oftentimes, this remote repository is stored on GitHub). The <code>git push</code> command is used to send local commits up to a remote repository. The <code>git pull</code> command is used to fetch changes from a remote repository and merge them into the local repository – <strong>pull</strong>ing will become a regular part of your workflow when collaborating with others, or even when working alone but on different machines (e.g. a laptop at home and a desktop at the office).</p>
<p>The processes described in the above sections (i.e. making changes to local working files, recording “snapshots” of them to create a versioned history of changes in a local Git repository, and sending those versions from our local Git repository to a remote repository (which is oftentimes on GitHub)) is illustrated using islands, buildings, bunnies, and packages in the artwork, below:</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="240410_brown_bag_files/allison-horst-git-workflow%20(1).png" class="img-fluid figure-img"></p>
<figcaption class="figure-caption">Artwork by Allison Horst</figcaption>
</figure>
</div>
<blockquote class="blockquote">
<p><strong><br>
What should I write in my commit message?</strong></p>
<p>Writing effective Git commit messages is essential for creating a meaningful and helpful version history in your repository. It is crucial to avoid skipping commit messages or resorting to generic phrases like “Updates.” When it comes to following best practices, there are several guidelines to enhance the readability and maintainability of the codebase.</p>
<p>Here are some guidelines for writing effective Git commit messages:</p>
<ol type="1">
<li><p><strong>Be descriptive and concise</strong>: Provide a clear and concise summary of the changes made in the commit. Aim to convey the purpose and impact of the commit in a few words.</p></li>
<li><p><strong>Use imperative tense</strong>: Write commit messages in the imperative tense, as if giving a command. For example, use “Add feature” instead of “Added feature” or “Adding feature.” This convention aligns with other Git commands and makes the messages more actionable.</p></li>
<li><p><strong>Separate subject and body</strong>: Start with a subject line, followed by a blank line, and then provide a more detailed explanation in the body if necessary. The subject line should be a short, one-line summary, while the body can provide additional context, motivation, or details about the changes.</p></li>
<li><p><strong>Limit the subject line length</strong>: Keep the subject line within 50 characters or less. This ensures that the commit messages are easily scannable and fit well in tools like Git logs.</p></li>
<li><p><strong>Capitalize and punctuate properly</strong>: Begin the subject line with a capital letter and use proper punctuation. This adds clarity and consistency to the commit messages.</p></li>
<li><p><strong>Focus on the “what” and “why”</strong>: Explain what changes were made and why they were made. Understanding the motivation behind a commit helps future researchers and collaborators (including you!) comprehend its purpose.</p></li>
<li><p><strong>Use present tense for subject, past tense for body</strong>: Write the subject line in present tense as it represents the current state of the codebase. Use past tense in the body to describe what has been done.</p></li>
<li><p><strong>Reference relevant issues</strong>: If the commit is related to a specific issue or task, include a reference to it. For example, you can mention the issue number or use keywords like “Fixes,” “Closes,” or “Resolves” followed by the issue number.</p></li>
</ol>
</blockquote>
<hr>
</section>
</section>
</section>
<section id="enough-of-the-explanations-set-up-global-options-in-git" class="level1">
<h1>Enough of the explanations: Set Up Global Options in Git</h1>
<p>Before using Git, you need to tell it who you are, also known as setting the global options. To do this, we will be setting the global options in the Terminal.</p>
<section id="whats-the-terminal" class="level2">
<h2 class="anchored" data-anchor-id="whats-the-terminal">What’s the Terminal?</h2>
<p>Technically, the Terminal is an interface for the shell, a computer program. To put that simply, we use the Terminal to tell a computer what to do. This is different from the Console in RStudio, which interprets R code and returns a value.</p>
</section>
<section id="opening-a-terminal-in-rstudio" class="level2">
<h2 class="anchored" data-anchor-id="opening-a-terminal-in-rstudio">Opening a Terminal in RStudio</h2>
<p>To get started, let’s open a new Terminal window in RStudio. Do this by clicking <code>Tools > Terminal > New Terminal</code>.</p>
<p>A Terminal tab should now be open where your Console usually is.</p>
</section>
<section id="dipping-your-toes-in-the-terminal" class="level2">
<h2 class="anchored" data-anchor-id="dipping-your-toes-in-the-terminal">Dipping Your Toes in the Terminal</h2>
<p>Most of our Git operations will be done in RStudio, but there are some situations where you must work in the Terminal and use command line. It may be daunting to code in the Terminal, but as your comfort increases over time, you might find you prefer it. Either way, it’s beneficial to learn enough command line and to feel comfortable in the Terminal.</p>
</section>
<section id="configuring-git" class="level2">
<h2 class="anchored" data-anchor-id="configuring-git">Configuring Git</h2>
<p>Let’s start by adding your user name to the global options. Type the following into the command prompt, with your exact GitHub username, and press enter:</p>
<pre><code>git config --global user.name "my_user_name"</code></pre>
<p>Note that if the code ran successfully, it will look like nothing happened. We will check at the end to make sure it worked.</p>
<p>Next, enter the following line, with the email address you used when you created your account on <a href="www.github.com">github.com</a>:</p>
<pre><code>git config --global user.email "[email protected]"</code></pre>
</section>
</section>
<section id="setting-up-git-credentials" class="level1">
<h1>Setting Up Git Credentials</h1>
<p>Next, we will set our credentials to not time out for a very long time. This is related to how our server operating system handles credentials - not doing this will make your Personal Access Token (PAT, which we will set up in the next section) expire immediately on the system, even though it is actually valid for at least a month.</p>
<section id="when-setting-up-git-and-github-on-your-personal-computer" class="level2">
<h2 class="anchored" data-anchor-id="when-setting-up-git-and-github-on-your-personal-computer">When Setting up Git and GitHub on Your Personal Computer</h2>
<p>You will not need to run the <code>git config</code> line below to set the cache. This is a specific configuration for the RStudio Server we are working on.</p>
<pre><code>git config --global credential.helper 'cache --timeout=10000000'</code></pre>
<p>Next, we will set the default branch name to <code>main</code> for any new repositories that are created moving forward. Why are we doing this? Previously, the default branch name was <code>master</code> and this racist terminology for Git branches motivates us to update our default branch to <code>main</code> instead.</p>
<pre><code>git config --global init.defaultBranch main</code></pre>
<p>Finally, check to make sure everything looks correct by entering this command, which will return the global options you have set.</p>
<pre><code>git config --global --list</code></pre>
</section>
<section id="github-authentication" class="level2">
<h2 class="anchored" data-anchor-id="github-authentication">2.2.2 GitHub Authentication</h2>
<p>GitHub recently deprecated password authentication for accessing repositories, so we need to set up a secure way to authenticate.</p>
<p>The book <a href="https://happygitwithr.com/"><em>Happy Git and GitHub for the useR</em></a> has a wealth of information related to working with Git in R, and these instructions are based off of <a href="https://happygitwithr.com/https-pat.html">Chapter 9: Personal Access Token for HTTPS</a>.</p>
<p>We will be using a <strong>Personal Access Token (PAT)</strong></p>
<section id="setting-up-your-pat" class="level3">
<h3 class="anchored" data-anchor-id="setting-up-your-pat">Setting Up Your PAT</h3>
<ul>
<li>Run <code>usethis::create_github_token()</code> in the Console.</li>
<li>A new browser window should open up to GitHub, showing all the scopes options. Using <code>create_github_token()</code> automatically pre-selects some recommended scopes. Scroll to the bottom and click “Generate Token”.</li>
<li>Copy the generated token.</li>
<li>Back in RStudio, run <code>gitcreds::gitcreds_set()</code> in the Console.</li>
<li>Paste your PAT when prompted.</li>
<li>Last thing, run <code>usethis::git_sitrep()</code> in the Console to check your Git configuration and confirm that you’ve successfully stored your PAT.</li>
</ul>
<p>Congrats! Now you’ve set up your authentication, and you should be able to work with GitHub in RStudio.</p>
<blockquote class="blockquote">
<p><strong>Note:</strong> For better security and long term use, we recommend taking the extra steps to set up SSH keys (check out Chapter 10 Set up Keys for SSH in the book <em>Happy Git and GitHub for the useR</em>).</p>
</blockquote>
<div class="callout callout-style-default callout-warning callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Warning
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>Git configuration to surpress warning messages</strong></p>
<p>Git version 2.27 includes a new feature that allows users to specify the default method for integrating changes from a remote repository into a local repository, without receiving a warning (this warning is informative, but can get annoying). To suppress this warning <em>for this repository only</em> we need to configure Git by running this line of code in the Terminal:</p>
<pre><code>git config pull.rebase false</code></pre>
<p><code>pull.rebase false</code> is a default strategy for pulling where Git will first try to auto-merge the files. If auto-merging is not possible, it will indicate a merge conflict.</p>
</div>
</div>
<hr>
</section>
</section>
</section>
<section id="collaborating-with-trusted-colleagues-without-conflicts" class="level1">
<h1><strong>Collaborating with trusted colleagues without conflicts</strong></h1>
<p>We start our collaboration by giving a trusted colleague access to our repository on GitHub. In this example, we define the <strong>Owner as the individual who owns the repository</strong>, and the <strong>Collaborator as the person whom the Owner chooses to give permission to make changes to their repository</strong>.</p>
<p>The Collaborator will make changes to the repository and then <code>push</code> those changes to the shared repository on GitHub. The Owner will then use <code>pull</code> to retrieve the changes without encountering any conflicts. <em>This is the most ideal workflow.</em></p>
<p>The instructors will demonstrate this process in the next section.</p>
<section id="step-0-owner-adds-a-collaborator-to-their-repository-on-github" class="level3">
<h3 class="anchored" data-anchor-id="step-0-owner-adds-a-collaborator-to-their-repository-on-github"><strong>Step 0: Owner adds a Collaborator to their repository on GitHub</strong></h3>
<p>The Owner must change the settings of the remote repository and give the Collaborator access to the repository by inviting them as a collaborator. Once the Collaborator accepts the owner’s invitation, they will have push access to the repository – meaning they can contribute their own changes/commits to the Owner’s repository.</p>
<p>To do this, the owner will navigate to their remote repository on GitHub, then choose <strong>Settings</strong> > <strong>Collaborators</strong> > <strong>Add people</strong>, to send an email invitation. The invitation will show as “Pending” until accepted.</p>
</section>
<section id="step-1-collaborator-clones-the-remote-repository" class="level3">
<h3 class="anchored" data-anchor-id="step-1-collaborator-clones-the-remote-repository"><strong>Step 1: Collaborator clones the remote repository</strong></h3>
<p>In order to contribute, the Collaborator must <strong>clone</strong> the repository from the <strong>Owner’s</strong> GitHub account (<em>Note: as a Collaborator, you won’t see the repository appear under your profile’s Repositories page</em>). To do this, the Collaborator should navigate to the Owner’s repository on GitHub, then copy the clone URL. In RStudio, the Collaborator will create a new project from version control by pasting this clone URL into the appropriate dialog box.</p>
</section>
</section>
<section id="clone-your-repository-and-use-git-locally-in-rstudio" class="level1">
<h1><strong><code>clone</code> your repository and use Git locally in RStudio</strong></h1>
<p>Let’s bring a copy of this remote repository down to our local computer (aka <strong>clone</strong> this repository) so that we can work comfortably in RStudio.</p>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>An important distinction</strong></p>
<p>We refer to the <strong>remote copy</strong> of the repository that is on GitHub as the <strong>origin repository</strong> (the one that we cloned from), and the copy on our local computer as the <strong>local repository</strong>.</p>
</div>
</div>
<p>Start by clicking the green <strong>Code</strong> button (top right of your file listing) and copying the URL to your clipboard (this URL represents the repository location):</p>
<p><img src="240410_brown_bag_files/github-test-clone-url.png" class="img-fluid" width="646"></p>
<blockquote class="blockquote">
<p><strong>Setup</strong></p>
<ul>
<li><p>Click <strong>File</strong> > <strong>New Project</strong></p></li>
<li><p>Select <strong>Version Control</strong> and paste the remote repository URL (which should be copied to your clipboard) in the <strong>Repository ULR</strong> field</p></li>
<li><p>Press <strong>Tab</strong>, which will auto-fill the <strong>Project directory name</strong> field with the same name as that of your remote repo – while you can name the local copy of the repository anything, it’s typical (and highly recommended) to use the same name as the GitHub repository to maintain the correspondence</p>
<p><img src="240410_brown_bag_files/rstudio-clone-repo-sam.png" class="img-fluid"></p></li>
</ul>
</blockquote>
<p>Once you click <strong>Create Project</strong>, a new RStudio window will open with all of the files from the remote repository copied locally. Depending on how your version of RStudio is configured, the location and size of the panes may differ, but they should all be present – you should see a <strong>Git</strong> tab, as well as the <strong>Files</strong> tab, where you can view all of the files copied from the remote repo to this local repo.</p>
<blockquote class="blockquote">
<p><strong>Last thing, some Git configuration to surpress warning messages</strong></p>
<p>Git version 2.27 includes a new feature that allows users to specify the default method for integrating changes from a remote repository into a local repository, without receiving a warning (this warning is informative, but can get annoying). To suppress this warning <em>for this repository only</em> we need to configure Git by running this line of code in the Terminal:</p>
<pre><code>git config pull.rebase false</code></pre>
<p><code>pull.rebase false</code> is a default strategy for pulling where Git will first try to auto-merge the files. If auto-merging is not possible, it will indicate a merge conflict.</p>
<p><strong><code>.gitignore</code> files allow you to specify which files/folders you <em>don’t</em> want Git to track</strong></p>
<p>A <code>.gitignore</code> file is automatically created in the root directory of your project when you initialize it as a Git repository. You’ll notice that there are already some R / R Project-specific files that have been added by default.</p>
<p><strong>Why is this useful?</strong> For many reasons, but possibly the greatest use-case is adding large files (GitHub has a file size limit of 2 GB) or files with sensitive information (e.g. keys, tokens) that you don’t want to accidentally push to GitHub.</p>
<p><strong>How do I do this?</strong> Let’s say I create a file with sensitive information that I don’t want to push to GitHub. I can add a line to my <code>.gitignore</code> file:</p>
<pre><code># added by default when I initalized my RProj as a Git Repository .Rproj.user .Rhistory .Rdata .httr-oauth .DS_Store .quarto # add file so that it doesn't get pushed to the remote repo (on GitHub); contains_sensitive_info.R</code></pre>
<p>If this file is currently untracked by Git, it should appear in my <strong>Git</strong> tab. Once I add it to the <code>.gitignore</code> <em>and</em> save the modified <code>.gitignore</code> file, you should see <code>contains_sensitive_info.R</code> disappear from the <strong>Git</strong> tab, and a modified <code>.gitignore</code> (denoted by a blue <strong>M</strong>) appear. Stage/commit/push this modified <code>.gitignore</code> file.</p>
</blockquote>
<div class="callout callout-style-default callout-tip callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Tip
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>Why do I need to add and commit files <em>before</em> pulling?</strong></p>
<blockquote class="blockquote">
<p>Remember, <code>git pull</code> is a combination of <code>git fetch</code>ing remote changes to your local repo and <code>git merge</code>ing those changes from your local repo into your local working file(s).</p>
<p>The <strong>merge</strong> part of <code>git pull</code> will fail if you have uncommitted changes in your local working file(s) to avoid any potential overwriting of your own changes. Because of this, you should always, add/commit <em>then</em> pull, <em>and finally</em> push.</p>
</blockquote>
</div>
</div>
<section id="a-note-on-advanced-collaboration-techniques" class="level2">
<h2 class="anchored" data-anchor-id="a-note-on-advanced-collaboration-techniques"><strong>A Note on Advanced Collaboration Techniques</strong></h2>
<p>There are many Git and GitHub collaboration techniques, some more advanced than others. We won’t be covering advanced strategies in this course. But here is a table for your reference on a few popular Git collaboration workflow strategies and tools.</p>
<table class="table">
<colgroup>
<col style="width: 8%">
<col style="width: 23%">
<col style="width: 37%">
<col style="width: 29%">
</colgroup>
<thead>
<tr class="header">
<th>Collaboration Technique</th>
<th>Benefits</th>
<th>When to Use</th>
<th>When Not to Use</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Branch Management Strategies</td>
<td>1. Enables parallel development and experimentation<br>
2. Facilitates isolation of features or bug fixes<br>
3. Provides flexibility and control over project workflows</td>
<td>When working on larger projects with multiple features or bug fixes simultaneously.<br>
When you want to maintain a stable main branch while developing new features or resolving issues on separate branches.<br>
When collaborating with teammates on different aspects of a project and later integrating their changes.</td>
<td>When working on small projects with a single developer or limited codebase.<br>
When the project scope is simple and doesn’t require extensive branch management.<br>
When there is no need to isolate features or bug fixes.</td>
</tr>
<tr class="even">
<td>Code Review Practices</td>
<td>1. Enhances code quality and correctness through feedback<br>
2. Promotes knowledge sharing and learning within the team<br>
3. Helps identify bugs, improve performance, and ensure adherence to coding standards</td>
<td>When collaborating on a codebase with team members to ensure code quality and maintain best practices.<br>
When you want to receive feedback and suggestions on your code to improve its readability, efficiency, or functionality.<br>
When working on critical or complex code that requires an extra layer of scrutiny before merging it into the main branch.</td>
<td>When working on personal projects or small codebases with no collaboration involved.<br>
When time constraints or project size make it impractical to conduct code reviews.<br>
When the codebase is less critical or has low complexity.</td>
</tr>
<tr class="odd">
<td>Forking</td>
<td>1. Enables independent experimentation and development<br>
2. Provides a way to contribute to a project without direct access<br>
3. Allows for creating separate, standalone copies of a repository</td>
<td>When you want to contribute to a project without having direct write access to the original repository.<br>
When you want to work on an independent variation or extension of an existing project.<br>
When experimenting with changes or modifications to a project while keeping the original repository intact.</td>
<td>When collaborating on a project with direct write access to the original repository.<br>
When the project does not allow external contributions or forking.<br>
When the project size or complexity doesn’t justify the need for independent variations.</td>
</tr>
<tr class="even">
<td>Pull Requests</td>
<td>1. Facilitates code review and discussion<br>
2. Allows for collaboration and feedback from team members<br>
3. Enables better organization and tracking of proposed changes</td>
<td>When working on a shared repository with a team and wanting to contribute changes in a controlled and collaborative manner.<br>
When you want to propose changes to a project managed by others and seek review and approval before merging them into the main codebase.</td>
<td>When working on personal projects or individual coding tasks without the need for collaboration.<br>
When immediate changes or fixes are required without review processes.<br>
When working on projects with a small team or single developer with direct write access to the repository.</td>
</tr>
</tbody>
</table>
<p>The “When Not to Use” column provides insights into situations where it <em>may</em> be less appropriate / unnecessary to use each collaboration technique, helping you make informed decisions based on the specific context and requirements of your project.</p>
<p>These techniques provide different benefits and are used in various collaboration scenarios, depending on the project’s needs and team dynamics.</p>
</section>
<section id="merge-conflicts" class="level2">
<h2 class="anchored" data-anchor-id="merge-conflicts"><strong>Merge conflicts</strong></h2>
<p><strong>Merge conflicts</strong> occur when both collaborators make conflicting changes to the same file. Resolving merge conflicts involves identifying the root of the problem and restoring the project to a normal state. Good communication, discussing file sections to work on, and avoiding overlaps can help prevent merge conflicts. However, if conflicts do arise, Git warns about potential issues and ensures that changes from different collaborators based on the same file version are not overwritten. To resolve conflicts, you need to explicitly specify whose changes should be used for each conflicting line in the file.</p>
<p>In this image, we see collaborators <code>mbjones</code> and <code>metamattj</code> have both made changes to the same line in the same <code>README.md</code> file. This is causing a merge conflict because Git doesn’t know whose changes came first. To resolve it, we need to tell Git whose changes to keep for that line, and whose changes to discard.</p>
<p><img src="240410_brown_bag_files/git-conflict-00-lines-changed.png" class="img-fluid"></p>
<section id="common-ways-to-resolve-a-merge-conflict" class="level3">
<h3 class="anchored" data-anchor-id="common-ways-to-resolve-a-merge-conflict"><strong>Common ways to resolve a merge conflict</strong></h3>
<p><strong>1. Abort, abort, abort…</strong></p>
<p>Sometimes you just made a mistake. When you get a merge conflict, the repository is placed in a “Merging” state until you resolve it. There’s a Terminal command to abort doing the merge altogether:</p>
<pre><code>git merge --abort</code></pre>
<p>Of course, after doing that you still haven’t synced with your Collaborator’s changes, so things are still unresolved. But at least your repository is now usable on your local machine.</p>
<p><strong>2. Checkout</strong></p>
<p>The simplest way to resolve a conflict, given that you know whose version of the file you want to keep, is to use the command line to tell Git to use either <strong>your</strong> changes (the person doing the <code>merge</code>), or <strong>their</strong> changes (the Collaborator).</p>
<ul>
<li><p>keep your Collaborator’s file: <code>git checkout --theirs conflicted_file.Rmd</code></p></li>
<li><p>keep your own file: <code>git checkout --ours conflicted_file.Rmd</code></p></li>
</ul>
<p>Once you have run that command, then run <code>add</code> (staging), <code>commit</code>, <code>pull</code>, and <code>push</code> the changes as normal.</p>
<p><strong>3. Pull and edit the file</strong></p>
<p>Option 2, above, requires the command line, however, we have a third option for resolving the merge conflict from RStudio. Using this approach will allow us to pick and choose some of our changes <em>and</em> some of our Collaborator’s changes by letting us manually edit and fix the conflicted file.</p>
<p>When you <code>pull</code> a file with a conflict, Git will provide you with a warning modify the file so that it includes both your own changes and your Collaborator’s changes. The file will also appear in the <strong>Git</strong> tab with an orange <code>U</code> icon, which indicates that the file is <code>Unmerged</code> and therefore awaiting your help to resolve the conflict. It delimits these blocks of conflicted code with a series of less than and greater than signs, so they are easy to find:</p>
<p><img src="240410_brown_bag_files/rstudio-merge-conflict.png" class="img-fluid"></p>
<p>In the above example, <code><<<<<<< HEAD</code> marks the start of <em>your</em> changes. The <code>=======</code> delimiter separates your changes from your Collaborator’s conflicting changes. <code>>>>>>>></code> mark the end of your Collaborator’s changes.</p>
<p>To resolve the conflicts, simply find all of these blocks, and edit them so that the file looks how you want (either pick your lines, your Collaborator’s lines, some combination, or something altogether new), and save. Be sure you removed the delimiter lines that started with</p>
<ul>
<li><p><code><<<<<<<</code></p></li>
<li><p><code>=======</code></p></li>
<li><p><code>>>>>>>></code></p></li>
</ul>
<p>Once you have made those changes, you simply <code>add</code> (staging), <code>commit</code>, and <code>push</code> the files to resolve the conflict.Clone a Repository from GitHub to R Studio</p>
</section>
</section>
<section id="best-practices-to-avoid-merge-conflicts" class="level2">
<h2 class="anchored" data-anchor-id="best-practices-to-avoid-merge-conflicts"><strong>Best practices to avoid merge conflicts</strong></h2>
<p>Some basic rules of thumb can avoid the vast majority of merge conflicts, saving a lot of time and frustration. These are words our teams live by:</p>
<ul>
<li><p>Communicate often and set up effective communication channels</p></li>
<li><p>Tell each other what you are working on</p></li>
<li><p>Start your working session with a <code>pull</code></p></li>
<li><p><code>Pull</code> immediately after you <code>commit</code> and before you <code>push</code></p></li>
<li><p><code>Commit</code> often in small chunks (this helps you organize your work!)</p></li>
<li><p>Make sure you and who you are collaborating with all <strong>fully</strong> understand the Git workflow you’re using (aka make sure you’re on the same page before you start)!</p></li>
</ul>
<p>A good workflow is encapsulated as follows:</p>
<p><code>Pull -> Edit -> Save -> Add (stage) -> Commit -> Pull -> (OPTIONAL) Fix any merge conflicts -> Push</code></p>
<p>It may take a bit of practice to get comfortable with navigating merge conflicts, but like any other technical skill, they’ll become less intimidating with time. With careful communication and a consistent workflow, conflicts can be largely avoided or resolved when they do occur.</p>
<hr>
</section>
</section>
<section id="organizing-an-r-project" class="level1">
<h1>Organizing an R Project</h1>
<p>When starting a new research project, one of the first things I do is create an R Project for it (just like we have here!). The next step is to then populate that project with relevant directories. There are many tools out there that can do this automatically. Some examples are <code>rrtools</code> or <code>usethis::create_package()</code>. The goal is to organize your project so that it is a compendium of your research. This means that the project has all of the digital parts needed to replicate your analysis, like code, figures, the manuscript, and data access.</p>
<section id="common-directories" class="level2">
<h2 class="anchored" data-anchor-id="common-directories">Common Directories</h2>
<ul>
<li><strong>data</strong>: This is where we store our data. It often contains subdirectories for raw, processed, and metadata data.</li>
<li><strong>R</strong>: Contains scripts for cleaning or wrangling, etc. If your work includes scripts beyond the R programming language, this directory can be misleading, and you might prefer to call it <code>scripts</code>.</li>
<li><strong>plots</strong> or <strong>figs</strong>: This is for generated plots, graphs, and figures.</li>
<li><strong>docs</strong>: Here, you can put summaries or reports of analysis or other relevant project information.</li>
</ul>
<p>Directory organization will vary from project to project, but the ultimate goal is to create a well-organized project that supports both reproducibility and collaboration.</p>
<p><img src="240410_brown_bag_files/rproj-basic-organization.png" class="img-fluid" width="533"></p>
</section>
</section>
<section id="set-up-a-quarto-document" class="level1">
<h1>Set up a Quarto Document:</h1>
<ul>
<li><p>Open a new Quarto file using the following prompts: File > New File > Quarto Document</p></li>
<li><p>A popup window will appear.</p></li>
<li><p>Give your file a new title, e.g “Introduction to Quarto”.</p></li>
<li><p>Leave the output format as HTML and Engine set to Knitr.</p></li>
<li><p>Then click the “Create” button.</p></li>
</ul>
<p>The first thing to notice is that by opening a file, we see the fourth pane of the RStudio pops up. This is our Quarto document which is essentially a text editor. We also see in the upper left side that we are looking at the document under the “Visual editor”. This is probably a familiar way of looking at a text document. To introduce the <strong>markdown</strong> syntax, we re going to move to the source editor and then come back to the visual editor. In the upper left corner, click on Source. See how the formatting changed? In the Source editor we are looking at the same text, but in markdown syntax. The visual editor on the other hand, allows us to see how markdown is rendered, therefore how is it going to look in our output document.</p>
<section id="render-the-quarto-document" class="level2">
<h2 class="anchored" data-anchor-id="render-the-quarto-document">Render the Quarto document</h2>
<p>Let’s go ahead and render this file by clicking the “Render” button, next to the blue arrow at the top of the Quarto file. When you first click this button, RStudio will prompt you to save this file. Save it in the top level of your home directory on the server, and name it something that you will remember (like <code>quarto-intro.Rmd</code>).</p>
<p>Notice how the grey <strong>R code chunks</strong> are surrounded by 3 back-ticks and <code>{r LABEL}</code>. The first chunk, in this case <code>1+1</code>, is evaluated and return the output number (2). Notice the line in the second chunk that says <code>#| echo: false</code>? This is a code chunk option that indicates not to print the code. In the rendered version, we can see the outcome of <code>2*2</code> but not the executed code that created the outcome.</p>
<p>The table below show some of the options available to customizing outputs (<a href="https://quarto.org/docs/computations/execution-options.html">Quarto.org</a>).</p>
<table class="table">
<colgroup>
<col style="width: 10%">
<col style="width: 89%">
</colgroup>
<tbody>
<tr class="odd">
<td></td>
<td>Code chunk options</td>
</tr>
<tr class="even">
<td>Option</td>
<td>Description</td>
</tr>
<tr class="odd">
<td><code>#| eval:</code></td>
<td>Evaluate the code chunk (if <code>false</code>, just echos the code into the output).</td>
</tr>
<tr class="even">
<td><code>#| echo:</code></td>
<td>Include the source code in output</td>
</tr>
<tr class="odd">
<td><code>#| warning:</code></td>
<td>Include warnings in the output.</td>
</tr>
<tr class="even">
<td><code>#| error:</code></td>
<td>Include warnings in the output.</td>
</tr>
<tr class="odd">
<td><code>#| include:</code></td>
<td>Catch all for preventing any output (code or results) from being included (e.g.<code>include: false</code> suppresses all output from the code block).</td>
</tr>
</tbody>
</table>
<p>Note that you can also combine these options by adding more than one to a code chunk.</p>
</section>
<section id="adding-citations" class="level2">
<h2 class="anchored" data-anchor-id="adding-citations"><strong>Adding citations</strong></h2>
<p>To add a citation, go to the visual editor and in the insert drop down, select “Citation.” In the window that appears, there are several options in the left hand panel for the source of your citation. If you have a citation manager, such as Zotero, installed, this would be included in that list. For now, select “From DOI”, and in the search bar enter a DOI of your choice (e.g.: 10.1038/s41467-020-17726-z), then select “Insert.”</p>
<p>After selecting insert, a couple of things happen. First, the citation reference is inserted into your markdown text as <code>[@oke2020]</code>. Second, a file called references.bib containing the BibTex format of the citation is created. Third, that file is added to the YAML header of your Quarto document (<code>bibliography: references.bib</code>). Adding another citation will automatically update your <code>references.bib</code> file. So easy!</p>
<p>Every time when opening a new Quarto document we should start by deleting all template text (everything except for the YAML). Then we save the document into the most convenient folder of our project. Now we are ready to start our work.</p>
<p>You can create a new chunk in your Quarto in one of these ways:</p>
<ul>
<li><p>Go to Code in the top menu bar, click “Insert Chunk”</p></li>
<li><p>Type by hand <code>{r}</code></p></li>
<li><p>Use the keyboard shortcut</p>
<ul>
<li><p>Mac:<code>command</code> + <code>option</code> + <code>i</code></p></li>
<li><p>Windows: <code>Ctrl</code> + <code>Alt</code> + <code>i</code></p></li>
</ul></li>
</ul>
<section id="about-code-chunks" class="level3">
<h3 class="anchored" data-anchor-id="about-code-chunks"><strong>About code chunks</strong></h3>
<p>Each code chunk needs to have an opening syntax ```<code>{r}</code> and a closing syntax ```. Everything in between these lines will be identified as R code. Let’s start by creating a new R chunk and run the following functions. Because this just an exploration and we do not want this chunk to be part of our report, we will indicate that by adding <code>#|eval: false</code> and <code>#| echo: false</code> in the setup of the chunk, that way, the code in this chunk will not run and not be displayed when I knit the final document.</p>
<p><strong>Best Practice</strong></p>
<p>It is generally good practice to include all of your <code>library()</code> calls in a single, dedicated R chunk near the top of your document. This lets collaborators know what packages they might need to install before they start running your code.</p>
</section>
</section>
</section>
<section id="reproducible-papers-with-rrtools" class="level1">
<h1><strong>Reproducible Papers with <code>rrtools</code></strong></h1>
<p>A great overview of this approach to reproducible papers comes from:</p>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p>Ben Marwick, Carl Boettiger & Lincoln Mullen (2018) <strong>Packaging Data Analytical Work Reproducibly Using R (and Friends)</strong>, The American Statistician, 72:1, 80-88, <a href="https://doi.org/10.1080/00031305.2017.1375986">doi:10.1080/00031305.2017.1375986</a></p>
</div>
</div>
<p>The key idea in Marwick et al. (2018) is that of the <em>research compendium</em>: A single container for not just the journal article associated with your research but also the underlying analysis, data, and even the required software environment required to reproduce your work.</p>
<p>Research compendium makes it easy for researchers to do their work but also for others to inspect or even reproduce the work because all necessary materials are readily at hand due to being kept in one place. Rather than a constrained set of rules, the research compendium is a scaffold upon which to conduct reproducible research using open science tools such as:</p>
<ul>
<li><p><a href="https://www.r-project.org/">R</a></p></li>
<li><p><a href="https://rmarkdown.rstudio.com/">RMarkdown</a></p></li>
<li><p><a href="https://quarto.org/">Quarto</a></p></li>
<li><p><a href="https://git-scm.com/">git</a> and <a href="https://github.com/">GitHub</a></p></li>
</ul>
<p>Fortunately for us, Ben Marwick (and others) have written an R package called <a href="https://github.com/benmarwick/rrtools">rrtools</a> that helps us create a research compendium from scratch.</p>
<p>Let’s explore the structure <code>rrtools</code> has put in place for us. Inside the analysis folder we have 5 folders. Different parts of our project will go into this different folders. Our data into the <code>data</code> folder, when the time comes to save any figure, we should save them into the <code>figures</code> folder, and so on.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="240410_brown_bag_files/compendia.png" class="img-fluid figure-img" width="751"></p>
<figcaption class="figure-caption">Research compendia from Marwick et al.</figcaption>
</figure>
</div>
<p>You’ll notice a <code>analysis/templates</code> directory that contains journal citation style language (CSL) files which set the style of citations and reference list for the journal (the Journal of Archaeological Science, in this example). The template.Rmd renders into the template.docx. This document is called in the paper.qmd YAML to style the output of the paper created in paper.qmd.</p>
<p>What if I want a template from another journal, different from the Journal of Archeological Science? We can create other journal’s template with the <code>rticles</code> package. This package will provide the templates and necessary information to render your paper in the journal of your choice (note: not all journal are in the <code>rticles</code> package). With that in mind, we will delete the existing <code>paper</code> directory and create a new one shortly.</p>
<p><strong>Additional information on RMarkdown templates with <code>rticles</code> can be found here <a href="https://learning.nceas.ucsb.edu/2024-02-arctic/session_20.html#rmarkdown-templates-with-rticles" class="uri">https://learning.nceas.ucsb.edu/2024-02-arctic/session_20.html#rmarkdown-templates-with-rticles</a></strong></p>
<section id="workflow-in-a-nutshell" class="level3">
<h3 class="anchored" data-anchor-id="workflow-in-a-nutshell"><strong>Workflow in a nutshell</strong></h3>
<div class="callout callout-style-default callout-important callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Important
</div>
</div>
<div class="callout-body-container callout-body">
<p><strong>Summary</strong></p>
<ul>
<li><p>Use <code>rrtools</code> to generate the core directory layout and approach to data handling.</p></li>
<li><p>Then use <code>rticles</code> to create the structure of the paper itself. The combination is incredibly flexible.</p></li>
</ul>
</div>
</div>
<p>Things we can do with our research compendium:</p>
<ul>
<li><p>Edit <code>./analysis/paper/paper.Rmd</code> to begin writing your paper and your analysis in the same document</p></li>
<li><p>Add any citations to <code>./analysis/paper/pnas-sample.bib</code></p></li>
<li><p>Add any longer R scripts that don’t fit in your paper in an <code>R</code> folder at the top level</p></li>
<li><p>Add raw data to <code>./data/raw_data</code></p></li>
<li><p>Write out any derived data (generated in <code>paper.Rmd</code>) to <code>./data/derived_data</code></p></li>
<li><p>Write out any figures in <code>./analysis/figures</code></p></li>
</ul>
<p>You can then write all of your R code in your RMarkdown/Quarto, and generate your manuscript all in the format needed for your journal (using it’s .csl file, stored in the paper directory).</p>
</section>
<section id="adding-renv-to-conserve-your-environment" class="level3">
<h3 class="anchored" data-anchor-id="adding-renv-to-conserve-your-environment"><strong>Adding <code>renv</code> to conserve your environment</strong></h3>
<ul>
<li><p><code>rrtools</code> has a couple more tricks up it’s sleeve to help your compendium be as reproducible and portable as possible.</p></li>
<li><p>To capture the R packages and versions this project depends on, we can use the <code>renv</code> package.</p></li>
<li><p>Running <code>renv::init()</code>, will initiate tracking of the R packages in your project.</p></li>
<li><p>This action will create a new folder called <code>renv</code> in your top directory.</p></li>
<li><p><code>renv::init()</code> automatically detects dependencies in your code (by looking for library calls, at the DESCRIPTION file, etc.) and installs them to a private project specific library. This means that your project <code>mypaper</code> can use a different version of <code>dplyr</code> than another project which may need an older version without any hassle.</p></li>
<li><p><code>renv</code> also write the package dependencies to a special file in the repository called <code>renv.lock</code>.</p></li>
<li><p>If any of your packages you are using is updated, while your are working on your project, you can run <code>renv::snapshot()</code> to update the <code>renv.lock</code> file and your project-installed packages.</p></li>
<li><p>You can read the <code>renv.lock</code> file using <code>renv::restore()</code>, when needed. This will install the versions of the packages needed.</p></li>
</ul>
</section>
<section id="conserve-your-computational-environment-with-docker" class="level2">
<h2 class="anchored" data-anchor-id="conserve-your-computational-environment-with-docker"><strong>Conserve your computational environment with Docker</strong></h2>
<ul>
<li><p>The <code>rrtools</code> package then uses this <code>renv.lock</code> file to build what is called a Dockerfile.</p></li>
<li><p><a href="http://www.docker.com/"><strong>Docker</strong></a> <strong>allows you to build containers, a standard unit of software that packages up code and all its dependencies so an application runs quickly and reliably from one computing environment to another.</strong></p></li>
<li><p>A container is an “image” of all the software specified, and this image can be run on other computers such that the software stack looks exactly as you specify.</p></li>
<li><p>This is important when it comes to reproducibility, because when running someone else code, you may get different results or errors if you are using different versions of software (like an old version of <code>dplyr</code>).</p></li>
<li><p>A Dockerfile contains the instructions for how to recreate the computational environment where your analysis was run.</p></li>
</ul>
<p><strong>In practice</strong></p>
<ul>
<li><p>Once you have your research compendium, you can called <code>rrtools::use_dockerfile()</code>. If needed, re-install <code>rrtools</code> directly from GitHub <code>remotes::install_github("benmarwick/rrtools")</code></p></li>
<li><p>This, first creates a Dockerfile that loads a standard image for using R with the tidyverse,</p></li>
<li><p>And then has more instructions for how to create the environment so that it has the very specific R packages and versions you need.</p></li>
<li><p>If we look at the Dockerfile (example below), it calls to <code>renv::restore()</code>, as described above.</p></li>
<li><p>The last line of the docker file renders our Quarto/RMarkdown reproducible paper!</p></li>
</ul>
<pre><code># get the base image, the rocker/verse has R, RStudio and pandoc FROM rocker/verse:4.2.2
# required MAINTAINER Your Name <[email protected]>
COPY . /<REPO>
# go into the repo directory
RUN . /etc/environment \
# Install linux depedendencies here
# e.g. need this for ggforce::geom_sina
&& sudo apt-get update \
&& sudo apt-get install libudunits2-dev -y \
# build this compendium package
&& R -e "install.packages('remotes', repos = c(CRAN = 'https://cloud.r-project.org'))" \
&& R -e "remotes::install_github(c('rstudio/renv', 'quarto-dev/quarto-r'))" \
# install pkgs we need
&& R -e "renv::restore()" \
# render the manuscript into a docx, you'll need to edit this if you've
# customised the location and name of your main qmd file
&& R -e "quarto::quarto_render('/<REPO>/analysis/paper/paper.qmd')"</code></pre>
<ul>
<li><p>After running <code>rrtools::use_dockerfile()</code>, the package also sets up GitHub Actions for you.</p></li>
<li><p>Actions are processes that are triggered in GitHub events (like a push) and run automatically.</p></li>
<li><p>In this case, the Action that is set up will build your Docker image on GitHub.</p></li>
<li><p>This means that the code that knits your paper is run, and an updated version of your paper is knit.</p></li>
<li><p>This is called <strong>continuous integration,</strong> and is extremely convenient for developing products like this, since the build step can be taken care of automatically as you push to your repository.</p></li>
</ul>
</section>
</section>
<section id="and-thats-all-the-information-i-thought-i-could-share-today" class="level1">
<h1>And that’s all the information I thought I could share today!</h1>
<p>Thank you for attending my TED Talk.</p>
<p>-Rebecca</p>
<hr>
<section id="additional-quarto-resources" class="level2">
<h2 class="anchored" data-anchor-id="additional-quarto-resources">Additional Quarto Resources:</h2>
<ul>
<li><p>Posit (the organization that developed Quarto) has great documentation, check out <a href="https://quarto.org/">Quarto.org</a></p></li>
<li><p>R for Data Science (2e) (Wickham et al, 2023), this is an awesome book for all R related things. Chapter <a href="https://r4ds.hadley.nz/quarto.html">29 and 30</a> are specific to Quarto.</p></li>
<li><p><a href="https://quarto.org/docs/gallery/">Quarto Gallery:</a> Example of different outputs created using Quarto</p></li>
<li><p><a href="https://openscapes.org/blog/2022-08-10-quarto-keynote/">Hello Quarto: share, collaborate, teach, reimagine</a>. A talk by Julia Stewart Lowndes and Mine Cetinkaya-Runde.</p></li>
</ul>
</section>
<section id="git-resources" class="level2">
<h2 class="anchored" data-anchor-id="git-resources">Git Resources:</h2>
<ul>
<li><p><a href="https://git-scm.com/book/en/v2">Pro Git Book</a></p></li>
<li><p><a href="https://happygitwithr.com/">Happy Git and GitHub for the useR</a></p></li>
<li><p><a href="https://docs.github.com/en/get-started/quickstart/set-up-git">GitHub Documentation</a></p></li>
<li><p><a href="https://learngitbranching.js.org/">Learn Git Branching</a> is an interactive tool to learn Git on the command line</p></li>
<li><p><a href="https://swcarpentry.github.io/git-novice/">Software Carpentry Version Control with Git</a></p></li>
</ul>
<p>Bitbucket’s tutorials on <a href="https://www.atlassian.com/git/tutorials/comparing-workflows">Git Workflows</a></p>
</section>
<section id="reproducible-research-resources" class="level2">
<h2 class="anchored" data-anchor-id="reproducible-research-resources">Reproducible Research Resources:</h2>
<ul>
<li><p><a href="https://github.com/benmarwick/rrtools"><code>rrtools</code> documentation</a></p></li>
<li><p>The <a href="https://github.com/rstudio/rticles%20B"><code>rticles</code></a></p></li>
<li><p><a href="https://usethis.r-lib.org/"><code>usethis</code> documentation</a></p></li>
</ul>
<p>Trisovic, Ana, Matthew K. Lau, Thomas Pasquier, and Mercè Crosas. 2022. “A Large-Scale Study on Research Code Quality and Execution.” <em>Scientific Data</em> 9 (1). <a href="https://doi.org/10.1038/s41597-022-01143-6" class="uri">https://doi.org/10.1038/s41597-022-01143-6</a>.</p>
</section>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const clipboard = new window.ClipboardJS('.code-copy-button', {
text: function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {