From 7ade4b725f7cabfa2f35be000b93751814d0cbd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Wed, 11 Oct 2023 11:58:36 +0200 Subject: [PATCH] Update freiburg.bib --- _bibliography/freiburg.bib | 83 +++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/_bibliography/freiburg.bib b/_bibliography/freiburg.bib index b97672bd2..ac8d65e4a 100644 --- a/_bibliography/freiburg.bib +++ b/_bibliography/freiburg.bib @@ -4,6 +4,79 @@ # Publications +@article{Mehta2023, + doi = {10.1080/14789450.2023.2265062}, + url = {https://doi.org/10.1080/14789450.2023.2265062}, + year = {2023}, + month = oct, + publisher = {Informa {UK} Limited}, + pages = {1--16}, + author = {Subina Mehta and Matthias Bernt and Matthew Chambers and Matthias Fahrner and Melanie Christine F\"{o}ll and Bjoern Gruening and Carlos Horro and James E. Johnson and Valentin Loux and Andrew T. Rajczewski and Oliver Schilling and Yves Vandenbrouck and Ove Johan Ragnar Gustafsson and W. C. Mike Thang and Cameron Hyde and Gareth Price and Pratik D. Jagtap and Timothy J. Griffin}, + title = {A Galaxy of informatics resources for {MS}-based proteomics}, + journal = {Expert Review of Proteomics} +} + +@article {Mc Cartney2023.09.25.559365, + author = {Ann M Mc Cartney and Giulio Formenti and Alice Mouton and Diego De Panis and Lu{\'\i}sa S Marins and Henrique G Leit{\~a}o and Genevieve Diedericks and Joseph Kirangwa and Marco Morselli and Judit Salces-Ortiz and Nuria Escudero and Alessio Iannucci and Chiara Natali and Hannes Svardal and Rosa Fern{\'a}ndez and Tim De Pooter and Geert Joris and Mojca Strazisar and Jo Wood and Katie E Herron and Ole Seehausen and Phillip C Watts and Felix Shaw and Robert P Davey and Alice Minotto and Jos{\'e} M Fern{\'a}ndez and Astrid B{\"o}hne and Carla Alegria and Tyler Alioto and Paulo C Alves and Isabel R Amorim and Jean-Marc Aury and Niclas Backstrom and Petr Baldrian and Laima Baltrunaite and Endre Barta and Bertrand Bed{\textquoteright}Hom and Caroline Belser and Johannes Bergsten and Laurie Bertrand and Helena Biland{\v z}ija and Mahesh Binzer-Panchal and Iliana Bista and Mark Blaxter and Paulo AV Borges and Guilherme Borges Dias and Mirte Bosse and Tom Brown and R{\'e}my Bruggmann and Elena Buena-Atienza and Josephine Burgin and Elena Buzan and Nicolas Casadei and Matteo Chiara and Sergio Chozas and Fedor {\v C}iampor, Jr. and Angelica Crottini and Corinne Cruaud and Fernando Cruz and Love Dalen and Alessio De Biase and Javier del Campo and Teo Deli{\'c} and Alice B Dennis and Martijn FL Derks and Maria Angela Diroma and Mihajla Djan and Simone Duprat and Klara Eleftheriadi and Philine GD Feulner and Jean-Fran{\c c}ois Flot and Giobbe Forni and Bruno Fosso and Pascal Fournier and Christine Fournier-Chambrillon and Toni Gabaldon and Shilpa Garg and Carmela Gissi and Luca Giupponi and J{\`e}ssica G{\'o}mez-Garrido and Josefa Gonz{\'a}lez and Miguel L Grilo and Bjoern Gruening and Thomas Gu{\'e}rin and Nad{\`e}ge Guiglielmoni and Marta Gut and Marcel P Haesler and Christoph Hahn and Balint Halpern and Peter Harrison and Julia Heintz and Maris Hindrikson and Jacob H{\"o}glund and Kerstin Howe and Graham Hughes and Benjamin Istace and Mark J. Cock and Franc Jancekovic and Zophon{\'\i}as O J{\'o}nsson and Sagane Joye-Dind and Janne J. Koskimaki and Boris Krystufek and Justyna Kubacka and Heiner Kuhl and Szilvia Kusza and Karine Labadie and Meri Lahteenaro and Henrik Lantz and Anton Lavrinienko and Lucas Lecl{\`e}re and Ricardo Jorge Lopes and Ole Madsen and Ghislaine Magdelenat and Giulia Magoga and Tereza Manousaki and Tapio Mappes and Jo{\~a}o P Marques and Gemma I Martinez Redondo and Florian Maumus and Hendrik-Jan Megens and Jos{\'e} Melo-Ferreira and Sofia L Mendes and Matteo Montagna and Jo{\~a}o Moreno and Mai-Britt Mosbech and Monica Moura and Zuzana Musilova and Eugene Myers and Will J. Nash and Alexander Nater and Pamela Nicholson and Manuel Niell and Reindert Nijland and Benjamin Noel and Karin Nor{\'e}n and Pedro H Oliveira and Remi-Andre Olsen and Lino Ometto and Stephan Ossowski and Vaidas Palinauskas and Sn{\ae}bj{\"o}rn P{\'a}lsson and Jerome P Panibe and Joana Paup{\'e}rio and Martina Pavlek and Emilie Payen and Julia Paw{\l}owska and Jaume Pellicer and Graziano Pesole and Joao Pimenta and Martin Pippel and Anna Maria Pirttil{\"a} and Nikos Poulakakis and Jeena Rajan and Ruben MC Rego and Roberto Resendes and Philipp Resl and Ana Riesgo and Patrik R{\"o}din-M{\"o}rch and Andr{\'e} ER Soares and Carlos Rodr{\'\i}guez Fernandes and Maria M. Romeiras and Guilherme Roxo and Lukas Ruber and Mar{\'\i}a Jos{\'e} Ruiz-L{\'o}pez and Urmas Saarma and Luis P Silva and Manuela Sim-Sim and Lucile Soler and Vitor C Sousa and Carla Sousa Santos and Alberto Spada and Milomir Stefanovi{\'c} and Viktor Steger and Josefin Stiller and Matthias St{\"o}ck and Torsten Hugo H Struck and Hiranya Sudasinghe and Riikka Tapanainen and Christian Tellgren-Roth and Helena Trindade and Yevhen Tukalenko and Ilenia Urso and Benoit Vacherie and Steven M Van Belleghem and Kees van Oers and Carlos Vargas-Chavez and Nevena Velickovic and Noel Vella and Adriana Vella and Cristiano Vernesi and Sara Vicente and Sara Villa and Olga Vinnere Pettersson and Filip AM Volckaert and Judit V{\"o}r{\"o}s and Patrick Wincker and Sylke Winkler and Claudio Ciofi and Robert M Waterhouse and Camila J Mazzoni}, + title = {The European Reference Genome Atlas: piloting a decentralised approach to equitable biodiversity genomics}, + elocation-id = {2023.09.25.559365}, + year = {2023}, + doi = {10.1101/2023.09.25.559365}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {A global genome database of all of Earth{\textquoteright}s species diversity could be a treasure trove of scientific discoveries. However, regardless of the major advances in genome sequencing technologies, only a tiny fraction of species have genomic information available. To contribute to a more complete planetary genomic database, scientists and institutions across the world have united under the Earth BioGenome Project (EBP), which plans to sequence and assemble high-quality reference genomes for all \~{}1.5 million recognized eukaryotic species through a stepwise phased approach. As the initiative transitions into Phase II, where 150,000 species are to be sequenced in just four years, worldwide participation in the project will be fundamental to success. As the European node of the EBP, the European Reference Genome Atlas (ERGA) seeks to implement a new decentralised, accessible, equitable and inclusive model for producing high-quality reference genomes, which will inform EBP as it scales. To embark on this mission, ERGA launched a Pilot Project to establish a network across Europe to develop and test the first infrastructure of its kind for the coordinated and distributed reference genome production on 98 European eukaryotic species from sample providers across 33 European countries. Here we outline the process and challenges faced during the development of a pilot infrastructure for the production of reference genome resources, and explore the effectiveness of this approach in terms of high-quality reference genome production, considering also equity and inclusion. The outcomes and lessons learned during this pilot provide a solid foundation for ERGA while offering key learnings to other transnational and national genomic resource projects.Competing Interest StatementThe authors have declared no competing interest.Biodiversity genomicsThe application of genomic methods to research biodiversity.BUSCOA bioinformatic method (Benchmarking Universal Single-Copy Orthologues) used to estimate the completeness of the coding fraction of an organism{\textquoteright}s genome based on the proportion of (lineage specific) single copy orthologous genes that are found in a genome assembly 51.INSDCInternational Nucleotide Sequence Database Collaboration (https://www.insdc.org/) is an initiative between the DDBJ, EMBL-EBI and NCBI that together act as a global repository of sequence data and associated metadata, and provide tools and services that allow access to genomic resources.Reference genomeAn accepted standard representation of an organism{\textquoteright}s DNA sequence. High-quality reference genomes typically have high completeness (chromosome-level with few gaps in sequence), few errors, and are annotated and accessible. A reference genome serves as a tool for alignment-based analyses, such as variant calling or RNAseq, and has many other applications, for example, phylogenetics and evolutionary relationships, identification of genes and variants, functional analysis and comparative genomics. Reference genomes referred to as {\textquotedblleft}drafts{\textquotedblright} are those that are under active construction and refinement, and not yet finalised through manual curation.Genomic resourceA genomic resource, for the purpose of this manuscript, refers to a reference genome, genome annotation, voucher specimen, cryopreserved sample and comprehensive metadata.FAIR PrinciplesA set of principles to guide appropriate management and curation of scientific data (https://www.go-fair.org/fair-principles/) that emphasise data accessibility and use by ensuring that data are Findable, Accessible, Interoperable, and Reusable. Due to the increasing amount of scientific data being reposited, FAIR guidelines promote a data format that is amenable to automated computational access of data by stakeholders63.CARE PrinciplesThe CARE principles for Indigenous data governance (https://www.gida-global.org/care) provide a governance framework that supports the recognition of rights and interests Indigenous Peoples{\textquoteright} to their physical and digital data as well as their Indigenous Knowledges64.MetadataA collection of data that provides contextual information about multiple characteristics of other, corresponding original data.VoucherA voucher specimen is a permanently preserved object (either whole or in part, and/or physical or digital) of an identified organism (verified by a recognised expert) and which is deposited in an accessible facility or database. A voucher provides physical evidence about any specimen{\textquoteright}s taxonomic identity14. Voucher deposition is a best practice for conducting biodiversity genomics research.(Genome) annotationThe process of identifying the functions of different pieces of a genome. This includes genes that code for proteins and non coding features (e.g. intron-exon structure of protein coding genes, promotors, transposable elements). Typically performed using computational methods, followed by manual curation.(Genome) completenessAn estimate of how well a reference genome represents the complete sequence of the target organism. A complete genome should equal the haploid genome size of the target, but may be defined when {\textquoteleft}all chromosomes are gapless and have no runs of 10 or more ambiguous bases, there are no unplaced or unlocalized scaffolds, and all expected chromosomes are present.{\textquoteright} (https://www.ncbi.nlm.nih.gov/assembly/). There are different approaches to estimate the completeness, like BUSCO, analysing K-mers, etc.LibraryDNA, cDNA, or RNA that has been prepared for NGS within (usually) a specific size range and containing adapters, which are designed to be appropriate for (a) specific sequencing platform(s).(Genome) assemblyA genome assembly is a representation of an organism{\textquoteright}s genome that is made using computer programs to turn (assemble) raw sequence data into longer, continuous sequences.PUIDA permanent unique identifier is a unique label for an object that does not change, such as the Digital Object Identifier (DOI) attached with a scientific publication.ENAThe European Nucleotide Archive (https://www.ebi.ac.uk/ena) is a global repository for sequence data and provides resources that support management and access to sequence data.Equity DeservingAccording to the Canadian Council (https://canadacouncil.ca/glossary/equity-seeking-groups) equity deserving groups are those individual researchers, communities, Peoples, regions or countries that have identified barriers to equal access, opportunities, and resources due to disadvantage and/or discrimination and that are actively seeking, and deserving of social justice and reparation. The discrimination experienced could be caused by attitudinal, historic, social, and environmental barriers that could be based on a plethora of characteristics that are including (but not limited to) sex, age, ethnicity, disability, economic status, gender, gender expression, nationality, race, sexual orientation, and creed.COPOThe Collaborative OPen Omics (COPO) platform is for researchers to publish their research assets, providing metadata annotation and deposition capability. It allows researchers to describe their datasets according to community standards and broker the submission of such data to appropriate repositories whilst tracking the resulting accessions/identifiers28.Open dataOpen data are freely accessible and unrestricted data that can be accessed, used, reused and shared with third parties for any purpose.HSMHierarchical Storage Management is both a data management and data storage technique which transparently manages the movement of data between the different layers of a tiered storage based on file size thresholds, usage and I/O pressure. Usually, a tiered storage is composed of one or more layers of disk arrays, ordered by capacity, latency, redundancy and storage cost. A slow but economically effective archival layer is at the bottom, composed of magnetic tape libraries and automated tape robots, with the highest capacity and latency. The movement between layers is automatically triggered.ONTOxford Nanopore Technologies (ONT; https://nanoporetech.com/) is a next generation sequencing technology whereby sequence data are generated from the changes in current that occur as single-stranded DNA or RNA molecules pass through nanoscale protein pores (nanopores). ONT provides long read data (up to several megabases) that facilitate genome assembly65,66.PacBioPacific Biosciences (PacBio; https://www.pacb.com/) is a single-molecule, real time (SMRT) next generation sequencing technology in which sequence data are generated by fluorescent light emission that occurs when a DNA polymerase adds nucleotides. PacBio produces long read data (tens of kilobases) that facilitate genome assembly.HiFi readsHiFi (High Fidelity) PacBio reads are produced by taking multiple sequences of the same molecule to provide a consensus sequence that is usually 12-20kbp long and has a low error rate (\>99.9 \% consensus accuracy)67.Hi-CSequencing-based method used to study three-dimensional interactions among chromatin regions by measuring the frequency of contact between pairs of loci. Since contact frequency is related to the distance between a pair of loci, Hi-C linking information is used to help with scaffolding stages during a genome assembly process.Hi-C map / graph productionThe occurrence and frequency of Hi-C contacts are analysed and used in assembly scaffolding. They are typically visualised in Hi-C 2D heatmaps with the full genome sequence on the X and Y axis and a markup for each observed contact.Omni-CModified version of Hi-C that uses a sequence-independent endonuclease during its protocol to produce more even sequence coverage increasing overall resolution.RNA-SeqRNA-Seq is a technique that determines the complete or partial RNA sequence using NGS. The RNA expression profiles vary in different tissues of the same organism and can be influenced by physiopathological circumstances. RNA-Seq data facilitate genome assembly by providing empirical evidence for annotation of transcribed regions68.IsoSeqThis is a sequencing protocol developed by PacBio that aims to sequence full-length transcripts using the accurate, long read capabilities of PacBio HiFi technology. IsoSeq data facilitate analysis of transcriptomes and genome annotation by identifying full-length isoforms of transcripts.HaplotypeA haplotype refers to the collection of genetic material within an organism that is inherited together. Haplotype may be used to describe a few loci or any number of chromosomes (a chromosome-scale haplotype).K-merA K-mer is a DNA sequence of length k; for example, the sequence AGCT contains the 3-mers (K-mers of length 3) AGC and GCT.TranscriptomeA transcriptome is a set of aligned RNAseq reads representing RNA collected from a sample or collection of samples. This includes both protein-coding and non-coding transcripts. For the ERGA Pilot Project, poly-A+ transcripts were profiled.Interested PartiesThis term, for the purposes of this manuscript refers to the range of external stakeholders (e.g., commercial companies, policymakers etc) and rights holders (e.g., Indigenous Peoples) that have an interest in biodiversity genomics research.EBP Genome assembly quality standard 6..Q40Minimum reference standard of 6.C.Q40, i.e. megabase N50 contig continuity and chromosomal scale N50 scaffolding, with less than 1/10,000 error rate. For species with chromosome N50 smaller than a megabase this will be C.C.Q40. Additional recommendations include K-mer completeness \>90\%, BUSCO complete single-copy single \>90\%, BUSCO complete single duplicate \< 5\%, and Gaps/Gbp \<1000.Widening CountryWidening countries are countries with low participation rates in FP7 and H2020 projects (low level of investment into research and innovation (R\&I)). According to the Horizon Europe regulation the Widening countries are: Bulgaria, Croatia, Cyprus, Czech republic, Estonia, Greece, Hungary, Latvia, Lithuania, Malta, Poland, Portugal, Romania, Slovakia, Slovenia and all associated countries with equivalent characteristics in terms of R\&I performance and the Outermost Regions.}, + URL = {https://www.biorxiv.org/content/early/2023/09/30/2023.09.25.559365}, + eprint = {https://www.biorxiv.org/content/early/2023/09/30/2023.09.25.559365.full.pdf}, + journal = {bioRxiv} +} + +@article{Hrdtner2023, + doi = {10.1016/j.atherosclerosis.2023.03.006}, + url = {https://doi.org/10.1016/j.atherosclerosis.2023.03.006}, + year = {2023}, + month = apr, + publisher = {Elsevier {BV}}, + volume = {371}, + pages = {1--13}, + author = {Carmen H\"{a}rdtner and Anup Kumar and Carolin A. Ehlert and Tamara Antonela Vico and Christopher Starz and Alexander von Ehr and Katja Krebs and Bianca Dufner and Natalie Hoppe and Peter Stachon and Timo Heidt and Dennis Wolf and Constantin von zur M\"{u}hlen and Bj\"{o}rn Gr\"{u}ning and Clinton S. Robbins and Lars Maegdefessel and Dirk Westermann and Tsai-Sang Dederichs and Ingo Hilgendorf}, + title = {A comparative gene expression matrix in Apoe-deficient mice identifies unique and atherosclerotic disease stage-specific gene regulation patterns in monocytes and macrophages}, + journal = {Atherosclerosis} +} + +@article{Rahman2023, + doi = {10.1101/2023.04.19.537514}, + url = {https://doi.org/10.1101/2023.04.19.537514}, + year = {2023}, + month = apr, + publisher = {Cold Spring Harbor Laboratory}, + author = {Nadim Rahman and Colman O'Cathail and Ahmad Zyoud and Alexey Sokolov and Bas Oude Munnink and Bj\"{o}rn Gr\"{u}ning and Carla Cummins and Clara Amid and David Nieuwenhuijse and D{\'{a}}vid Visontai and David Yu Yuan and Dipayan Gupta and Divyae Prasad and G{\'{a}}bor M{\'{a}}t{\'{e}} Guly{\'{a}}s and Gabriele Rinck and Jasmine McKinnon and Jeena Rajan and Jeff Knaggs and Jeffrey Edward Skiby and J{\'{o}}zsef St{\'{e}}ger and Judit Szarvas and Khadim Gueye and Kriszti{\'{a}}n Papp and Maarten Hoek and Manish Kumar and Marianna Ventouratou and Marie-Catherine Bouquieaux and Martin Koliba and Milena Mansurova and Muhammad Haseeb and Nathalie Worp and Peter W. Harrison and Rasko Leinonen and Ross Thorne and Sandeep Selvakumar and Sarah Hunt and Sundar Venkataraman and Suran Jayathilaka and Timoth{\'{e}}e Cezard and Wolfgang Maier and Zahra Waheed and Zamin Iqbal and Frank M{\o}ller Aarestrup and Istvan Csabai and Marion Koopmans and Tony Burdett and Guy Cochrane}, + title = {Mobilisation and analyses of publicly available {SARS}-{CoV}-2 data for pandemic responses} +} + +@article{Guerler2023, + doi = {10.1186/s12859-023-05389-8}, + url = {https://doi.org/10.1186/s12859-023-05389-8}, + year = {2023}, + month = jun, + publisher = {Springer Science and Business Media {LLC}}, + volume = {24}, + number = {1}, + author = {Aysam Guerler and Dannon Baker and Marius van den Beek and Bjoern Gruening and Dave Bouvier and Nate Coraor and Stephen D. Shank and Jordan D. Zehr and Michael C. Schatz and Anton Nekrutenko}, + title = {Fast and accurate genome-wide predictions and structural modeling of protein{\textendash}protein interactions using Galaxy}, + journal = {{BMC} Bioinformatics} +} + +@article{Rasche2022, + doi = {10.1093/gigascience/giad048}, + url = {https://doi.org/10.1093/gigascience/giad048}, + year = {2022}, + month = dec, + publisher = {Oxford University Press ({OUP})}, + volume = {12}, + author = {Helena Rasche and Cameron Hyde and John Davis and Simon Gladman and Nate Coraor and Anthony Bretaudeau and Gianmauro Cuccuru and Wendi Bacon and Beatriz Serrano-Solano and Jennifer Hillman-Jackson and Saskia Hiltemann and Miaomiao Zhou and Bj\"{o}rn Gr\"{u}ning and Andrew Stubbs}, + title = {Training Infrastructure as a Service}, + journal = {{GigaScience}} +} + @article{Riesle2023, doi = {10.1038/s41467-023-41507-z}, url = {https://doi.org/10.1038/s41467-023-41507-z}, @@ -340,16 +413,6 @@ @article{rajczewski_rigorous_2021 year = {2021} } -@article{Guerler2021, - doi = {10.1101/2021.03.17.435706}, - url = {https://doi.org/10.1101/2021.03.17.435706}, - year = {2021}, - month = mar, - publisher = {Cold Spring Harbor Laboratory}, - author = {Aysam Guerler and Dannon Baker and Marius van den Beek and Bjoern Gruening and Dave Bouvier and Nate Coraor and Stephen D. Shank and Jordan D. Zehr and Michael C. Schatz and Anton Nekrutenko}, - title = {Fast and accurate genome-wide predictions and structural modeling of protein-protein interactions using Galaxy} -} - @article{Wolff2021, doi = {10.1093/bioinformatics/btab394}, url = {https://doi.org/10.1093/bioinformatics/btab394},