-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add a new readme * update articles menu * update vignettes * update changelog * update readme * run styler * style vignette * turn into code * Update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Luke Zappia <[email protected]> * rename core classes to core registries * update documentation * remove duplicate content * Apply suggestions from code review Co-authored-by: Luke Zappia <[email protected]> * Suggestions from review * move helper function for generating markdown documentation from modules to a separate file * add wetlab vignette * fix formatting of class names * update changelog * style code * update readme * add links to authors --------- Co-authored-by: Luke Zappia <[email protected]>
- Loading branch information
Showing
13 changed files
with
666 additions
and
347 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ Authors@R:c( | |
person("Lamin Labs", , , "[email protected]", role = c("aut", "cph")) | ||
) | ||
Description: Interact with 'LaminDB' from R. 'LaminDB' is an open-source | ||
data framework for biology. This package allows you to query and | ||
data framework for biology. This package allows you to query and | ||
download data from 'LaminDB' instances. | ||
License: Apache License (>= 2) | ||
URL: https://laminr.lamin.ai, https://github.com/laminlabs/laminr | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# nolint start nolint_cyclomatic_linter | ||
generate_module_markdown <- function(db, module_name, allowed_related_modules = c("core", module_name)) { | ||
# nolint end nolint_cyclomatic_linter | ||
module <- db$get_module(module_name) | ||
|
||
registry_names <- module$get_registry_names() | ||
|
||
type_map <- c( | ||
"BigAutoField" = "integer64", | ||
"AutoField" = "integer", | ||
"CharField" = "character", | ||
"BooleanField" = "logical", | ||
"DateTimeField" = "POSIXct", | ||
"TextField" = "character", | ||
"ForeignKey" = "integer64", | ||
"BigIntegerField" = "integer64", | ||
"SmallIntegerField" = "integer", | ||
"JSONField" = "list" | ||
) | ||
|
||
output <- c() | ||
|
||
for (registry_name in registry_names) { # nolint cyclocomp_linter | ||
registry <- module$get_registry(registry_name) | ||
fields <- registry$get_fields() | ||
|
||
if (registry$is_link_table) { | ||
next | ||
} | ||
|
||
output <- output |> c(paste0("## ", registry$class_name, "\n\n")) | ||
|
||
classes <- class(registry) |> discard(~ .x == "R6") | ||
class_urls <- paste0("`?", classes, "`") | ||
|
||
output <- output |> c(paste0("Base classes: ", paste(class_urls, collapse = ", "), "\n\n")) | ||
|
||
## Document simple fields | ||
simple_fields <- fields |> keep( | ||
~ is.null(.x$related_field_name) && | ||
!grepl("^_", .x$field_name) | ||
) | ||
|
||
if (length(simple_fields) > 0) { | ||
output <- output |> c(paste0("### Simple fields\n\n")) | ||
} | ||
|
||
for (field in simple_fields) { | ||
field_type <- | ||
if (field$type %in% names(type_map)) { | ||
type_map[[field$type]] | ||
} else { | ||
field$type | ||
} | ||
output <- output |> c(paste0("* `", field$field_name, "` (`", field_type, "`)\n")) | ||
} | ||
|
||
if (length(simple_fields) > 0) { | ||
output <- output |> c("\n\n") | ||
} | ||
|
||
## Document relational fields | ||
relational_fields <- fields |> keep( | ||
~ !is.null(.x$related_field_name) && | ||
!grepl("^_", .x$field_name) && | ||
!.x$is_link_table && | ||
.x$related_module_name %in% allowed_related_modules | ||
) | ||
|
||
if (length(relational_fields) > 0) { | ||
output <- output |> c(paste0("### Relational fields\n\n")) | ||
} | ||
|
||
for (field in relational_fields) { | ||
related_module <- db$get_module(field$related_module_name) | ||
related_registry <- related_module$get_registry(field$related_registry_name) | ||
|
||
related_class_name <- | ||
if (related_module$name == "core") { | ||
related_registry$class_name | ||
} else { | ||
paste0(related_module$name, "$", related_registry$class_name) | ||
} | ||
related_link <- | ||
if (related_module$name == module_name) { | ||
paste0("#", related_registry$name) | ||
} else { | ||
paste0("module_", related_module$name, ".html#", related_registry$name) | ||
} | ||
|
||
output <- output |> c(paste0( | ||
" * `", field$field_name, "` ([`", related_class_name, "`](", | ||
related_link, "))\n" | ||
)) | ||
} | ||
|
||
if (length(relational_fields) > 0) { | ||
output <- output |> c("\n\n") | ||
} | ||
} | ||
|
||
output | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,61 @@ | ||
# LaminR: Work with LaminDB instances in R | ||
# {laminr}: An R interface to LaminDB | ||
|
||
|
||
<!-- | ||
DO NOT edit the README.md directly. | ||
Instead, edit the README.qmd file and render it using `quarto render README.qmd`. | ||
--> | ||
<!-- badges: start --> | ||
|
||
[![R-CMD-check](https://github.com/laminlabs/laminr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/laminlabs/laminr/actions/workflows/R-CMD-check.yaml) | ||
<!-- badges: end --> | ||
|
||
This package allows you to query and download data from LaminDB | ||
instances. | ||
**{laminr}** is an R package that provides an interface to [LaminDB](https://lamin.ai), a powerful open-source data framework designed specifically for biological research. With laminr, you can leverage LaminDB's powerful features to manage, query, and track your data and metadata with unparalleled efficiency and scalability, all within the familiar comfort of R. | ||
|
||
## Why use {laminr}? | ||
|
||
LaminDB offers a unique approach to data management in bioinformatics, providing: | ||
|
||
* **Unified Data and Metadata Handling**: Organize your data and its associated metadata in a structured and accessible manner. | ||
* **Powerful Querying and Search**: Effortlessly filter and retrieve specific data and metadata using intuitive query functions. | ||
* **Data Lineage Tracking**: Maintain a comprehensive history of your data transformations, ensuring reproducibility and transparency. | ||
* **Ontology Integration**: Leverage public ontologies (e.g., for genes, proteins, cell types) for standardized metadata annotation. | ||
* **Data Validation and Standardization**: Ensure data quality and consistency with built-in validation and standardization tools. | ||
|
||
## Setup | ||
**{laminr}** brings all these benefits to your R workflow, allowing you to seamlessly integrate LaminDB into your existing analysis pipelines. | ||
|
||
Install the development version from GitHub: | ||
## Installation | ||
|
||
Get started with **{laminr}** by installing the development version directly from GitHub: | ||
|
||
``` r | ||
# install.packages("remotes") | ||
remotes::install_github("laminlabs/laminr") | ||
``` | ||
|
||
To install all suggested dependencies required for some functionality, | ||
use: | ||
To include all suggested dependencies for enhanced functionality, use: | ||
|
||
``` r | ||
remotes::install_github("laminlabs/laminr", dependencies = TRUE) | ||
``` | ||
|
||
You will also need to install `lamindb`: | ||
You will also need to install the `lamindb` Python package: | ||
|
||
``` bash | ||
pip install lamindb[aws] | ||
``` | ||
|
||
## Connect to an instance | ||
|
||
To connect to a LaminDB instance, you will first need to run | ||
`lamin login` OR `lamin load <instance>` in the terminal. This will | ||
create a directory in your home directory called `.lamin` with the | ||
necessary credentials. | ||
|
||
``` bash | ||
lamin login | ||
lamin connect laminlabs/cellxgene | ||
``` | ||
|
||
Then, you can connect to the instance using the `laminr::connect()` | ||
function: | ||
|
||
``` r | ||
library(laminr) | ||
|
||
db <- connect("laminlabs/cellxgene") | ||
db | ||
``` | ||
|
||
cellxgene | ||
Core registries | ||
$Run | ||
$User | ||
$Param | ||
$ULabel | ||
$Feature | ||
$Storage | ||
$Artifact | ||
$Transform | ||
$Collection | ||
$FeatureSet | ||
$ParamValue | ||
$FeatureValue | ||
Additional modules | ||
bionty | ||
|
||
## Query the instance | ||
|
||
You can use the `db` object to query the instance: | ||
|
||
``` r | ||
artifact <- db$Artifact$get("KBW89Mf7IGcekja2hADu") | ||
``` | ||
|
||
You can print the record: | ||
## Getting started | ||
|
||
``` r | ||
artifact | ||
``` | ||
The best way to get started with **{laminr}** is to explore the package vignettes (available at [laminr.lamin.ai](https://laminr.lamin.ai)): | ||
|
||
Artifact(uid='KBW89Mf7IGcekja2hADu', description='Myeloid compartment', key='cell-census/2024-07-01/h5ads/fe52003e-1460-4a65-a213-2bb1a508332f.h5ad', created_by_id=1, run_id=27, suffix='.h5ad', created_at='2024-07-12T12:34:10.345829+00:00', hash='SZ5tB0T4YKfiUuUkAL09ZA', _hash_type='md5-n', storage_id=2, version='2024-07-01', _accessor='AnnData', id=3659, is_latest=TRUE, _key_is_virtual=FALSE, transform_id=22, n_observations=51552, size=691757462, visibility=1, updated_at='2024-07-12T12:40:48.837026+00:00', type='dataset') | ||
|
||
Or call the `$describe()` method to get a summary: | ||
|
||
``` r | ||
artifact$describe() | ||
``` | ||
* **Getting Started**: Learn the basics and explore practical examples (`vignette("laminr", package = "laminr")`). | ||
* **Package Architecture**: Get a better understanding of how **{laminr}** works (`vignette("architecture", package = "laminr")`). | ||
* **Development Roadmap**: Explore current features and future development plans (`vignette("development", package = "laminr")`). | ||
|
||
Artifact(uid='KBW89Mf7IGcekja2hADu', description='Myeloid compartment', key='cell-census/2024-07-01/h5ads/fe52003e-1460-4a65-a213-2bb1a508332f.h5ad', created_by_id=1, run_id=27, suffix='.h5ad', created_at='2024-07-12T12:34:10.345829+00:00', hash='SZ5tB0T4YKfiUuUkAL09ZA', _hash_type='md5-n', storage_id=2, version='2024-07-01', _accessor='AnnData', id=3659, is_latest=TRUE, _key_is_virtual=FALSE, transform_id=22, n_observations=51552, size=691757462, visibility=1, updated_at='2024-07-12T12:40:48.837026+00:00', type='dataset') | ||
Provenance | ||
$storage = 's3://cellxgene-data-public' | ||
$transform = 'Census release 2024-07-01 (LTS)' | ||
$run = '2024-07-16T12:49:41.81955+00:00' | ||
$created_by = 'sunnyosun' | ||
For information on specific modules and functionalities, check out the following vignettes: | ||
|
||
## Access fields | ||
* **Core Module**: Learn about the core registries available in a LaminDB instance (`vignette("module_core", package = "laminr")`). | ||
* **Bionty Module**: Explore the bionty module for biology-related entities (`vignette("module_bionty", package = "laminr")`). | ||
|
||
## Learn more | ||
|
||
You can access its fields as follows: | ||
|
||
- `artifact$id`: 3659 | ||
- `artifact$uid`: KBW89Mf7IGcekja2hADu | ||
- `artifact$key`: | ||
cell-census/2024-07-01/h5ads/fe52003e-1460-4a65-a213-2bb1a508332f.h5ad | ||
|
||
You can also fetch related fields: | ||
|
||
- `artifact$root`: s3://cellxgene-data-public | ||
- `artifact$created_by`: sunnyosun | ||
|
||
## Load the artifact | ||
|
||
You can directly load the artifact to access its data: | ||
|
||
``` r | ||
artifact$load() | ||
``` | ||
For more information about LaminDB and its features, check out the following resources: | ||
|
||
ℹ 's3://cellxgene-data-public/cell-census/2024-07-01/h5ads/fe52003e-1460-4a65-a213-2bb1a508332f.h5ad' already exists at '/home/luke/.cache/lamindb/cellxgene-data-public/cell-census/2024-07-01/h5ads/fe52003e-1460-4a65-a213-2bb1a508332f.h5ad' | ||
* [LaminDB website](https://lamin.ai/) | ||
|
||
AnnData object with n_obs × n_vars = 51552 × 36398 | ||
obs: 'donor_id', 'Predicted_labels_CellTypist', 'Majority_voting_CellTypist', 'Manually_curated_celltype', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'suspension_type', 'tissue_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid' | ||
var: 'gene_symbols', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length' | ||
uns: 'cell_type_ontology_term_id_colors', 'citation', 'default_embedding', 'schema_reference', 'schema_version', 'sex_ontology_term_id_colors', 'title' | ||
obsm: 'X_umap' | ||
* [LaminDB documentation](https://docs.lamin.ai/) |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.