Query artifacts#
Here, weβll query artifacts and inspect their metadata.
This guide can be skipped if you are only interested in how to leverage the overall collection.
import lamindb as ln
import lnschema_bionty as lb
import anndata as ad
π‘ lamindb instance: testuser1/test-scrna
ln.track()
π‘ notebook imports: anndata==0.9.2 lamindb==0.67.2 lnschema_bionty==0.39.0
π‘ saved: Transform(uid='agayZTonayqA5zKv', name='Query artifacts', short_name='scrna3', version='1', type=notebook, updated_at=2024-01-24 13:38:03 UTC, created_by_id=1)
π‘ saved: Run(uid='J6ndWVYMxXcwM6znOlkn', run_at=2024-01-24 13:38:03 UTC, transform_id=3, created_by_id=1)
Query artifacts by provenance metadata#
users = ln.User.lookup()
ln.Transform.filter(created_by=users.testuser1).search("scrna")
uid | score | |
---|---|---|
name | ||
scRNA-seq | Nv48yAceNSh85zKv | 90.0 |
Standardize and append a batch of data | ManDYgmftZ8C5zKv | 45.0 |
Query artifacts | agayZTonayqA5zKv | 36.0 |
transform = ln.Transform.filter(uid="Nv48yAceNSh85zKv").one()
ln.Artifact.filter(transform=transform).df()
uid | storage_id | key | suffix | accessor | description | version | size | hash | hash_type | n_objects | n_observations | transform_id | run_id | visibility | key_is_virtual | created_at | updated_at | created_by_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||
1 | 5rQPk6jQmbjiJEGvZIIz | 1 | scrna/conde22.h5ad | .h5ad | AnnData | Human immune cells from Conde22 | None | 57612943 | 9sXda5E7BYiVoDOQkTC0KB | sha1-fl | None | None | 1 | 1 | 1 | True | 2024-01-24 13:37:36.544637+00:00 | 2024-01-24 13:37:38.410369+00:00 | 1 |
Query artifacts by biological metadata#
assays = lb.ExperimentalFactor.lookup()
organism = lb.Organism.lookup()
cell_types = lb.CellType.lookup()
query = ln.Artifact.filter(
experimental_factors=assays.single_cell_rna_sequencing,
organism=organism.human,
cell_types=cell_types.gamma_delta_t_cell,
)
query.df()
uid | storage_id | key | suffix | accessor | description | version | size | hash | hash_type | n_objects | n_observations | transform_id | run_id | visibility | key_is_virtual | created_at | updated_at | created_by_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||
1 | 5rQPk6jQmbjiJEGvZIIz | 1 | scrna/conde22.h5ad | .h5ad | AnnData | Human immune cells from Conde22 | None | 57612943 | 9sXda5E7BYiVoDOQkTC0KB | sha1-fl | None | None | 1 | 1 | 1 | True | 2024-01-24 13:37:36.544637+00:00 | 2024-01-24 13:37:38.410369+00:00 | 1 |
Inspect artifact metadata#
query_set = ln.Artifact.filter().all()
artifact1, artifact2 = query_set[0], query_set[1]
artifact1.describe()
Artifact(uid='5rQPk6jQmbjiJEGvZIIz', key='scrna/conde22.h5ad', suffix='.h5ad', accessor='AnnData', description='Human immune cells from Conde22', size=57612943, hash='9sXda5E7BYiVoDOQkTC0KB', hash_type='sha1-fl', visibility=1, key_is_virtual=True, updated_at=2024-01-24 13:37:38 UTC)
Provenance:
ποΈ storage: Storage(uid='y7uEjJtq', root='/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna', type='local', updated_at=2024-01-24 13:37:11 UTC, created_by_id=1)
π transform: Transform(uid='Nv48yAceNSh85zKv', name='scRNA-seq', short_name='scrna', version='1', type='notebook', updated_at=2024-01-24 13:37:15 UTC, created_by_id=1)
π£ run: Run(uid='XDwGuZ15Ym0syDiDKnMi', run_at=2024-01-24 13:37:15 UTC, transform_id=1, created_by_id=1)
π€ created_by: User(uid='DzTjkKse', handle='testuser1', name='Test User1', updated_at=2024-01-24 13:37:11 UTC)
β¬οΈ input_of (core.Run): ['2024-01-24 13:37:44 UTC']
Features:
var: FeatureSet(uid='GgyyMaxOALjCcPCIm3Yq', n=36390, type='number', registry='bionty.Gene', hash='gRQGj3QB8ZsIfXA1BjiL', updated_at=2024-01-24 13:37:35 UTC, created_by_id=1)
'MIR1302-2HG', 'FAM138A', 'OR4F5', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'OR4F29', 'None', 'OR4F16', 'None', 'LINC01409', 'FAM87B', 'LINC01128', 'LINC00115', 'FAM41C', 'None', ...
obs: FeatureSet(uid='4TEcPnmG7T3dRdQymmyY', n=4, registry='core.Feature', hash='jKvG9U7UNNKJMc6t0H99', updated_at=2024-01-24 13:37:36 UTC, created_by_id=1)
π cell_type (32, bionty.CellType): 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage', ...
π assay (4, bionty.ExperimentalFactor): 'single-cell RNA sequencing', '10x 3' v3', '10x 5' v2', '10x 5' v1'
π tissue (17, bionty.Tissue): 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow', ...
π donor (12, core.ULabel): 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C', ...
Labels:
π·οΈ organism (1, bionty.Organism): 'human'
π·οΈ tissues (17, bionty.Tissue): 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow', ...
π·οΈ cell_types (32, bionty.CellType): 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage', ...
π·οΈ experimental_factors (4, bionty.ExperimentalFactor): 'single-cell RNA sequencing', '10x 3' v3', '10x 5' v2', '10x 5' v1'
π·οΈ ulabels (12, core.ULabel): 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C', ...
artifact1.view_lineage()
artifact2.describe()
Artifact(uid='vg8ACw5giDiVeFdVoFMA', suffix='.h5ad', accessor='AnnData', description='10x reference adata', size=853388, hash='eKH1ljAEh7Kd81-o2H4A7w', hash_type='md5', visibility=1, key_is_virtual=True, updated_at=2024-01-24 13:37:56 UTC)
Provenance:
ποΈ storage: Storage(uid='y7uEjJtq', root='/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna', type='local', updated_at=2024-01-24 13:37:11 UTC, created_by_id=1)
π transform: Transform(uid='ManDYgmftZ8C5zKv', name='Standardize and append a batch of data', short_name='scrna2', version='1', type='notebook', updated_at=2024-01-24 13:37:44 UTC, created_by_id=1)
π£ run: Run(uid='NI5PiOJiQX8AWaRqFHuD', run_at=2024-01-24 13:37:44 UTC, transform_id=2, created_by_id=1)
π€ created_by: User(uid='DzTjkKse', handle='testuser1', name='Test User1', updated_at=2024-01-24 13:37:11 UTC)
Features:
var: FeatureSet(uid='TLTP90vGEuNfx6edPdB0', n=749, type='number', registry='bionty.Gene', hash='o70Gw1y_TnH190ggJ4Fw', updated_at=2024-01-24 13:37:55 UTC, created_by_id=1)
'IL18', 'NPM3', 'S100A9', 'S100A8', 'CNN2', 'ARHGAP45', 'RNF34', 'GPX4', 'S100A6', 'ADISSP', 'S100A4', 'FAM174C', 'SIT1', 'CCDC107', 'RSL1D1', 'TLN1', 'HES4', 'TNFRSF17', 'PCNA', 'RAB13', ...
obs: FeatureSet(uid='7nChLFCDOlYc5cTObHXv', n=1, registry='core.Feature', hash='xGI6LLg2yjtfmT842gIY', updated_at=2024-01-24 13:37:56 UTC, created_by_id=1)
π cell_type (9, bionty.CellType): 'dendritic cell', 'B cell, CD19-positive', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-negative classical monocyte', 'CD38-positive naive B cell', 'CD4-positive, alpha-beta T cell', 'CD16-positive, CD56-dim natural killer cell, human'
external: FeatureSet(uid='kK8VDsHHaGBf4MOeBl3M', n=2, registry='core.Feature', hash='Cd6sfM0NoF0o0l1mYdrj', updated_at=2024-01-24 13:37:56 UTC, created_by_id=1)
π assay (1, bionty.ExperimentalFactor): 'single-cell RNA sequencing'
π organism (1, bionty.Organism): 'human'
Labels:
π·οΈ organism (1, bionty.Organism): 'human'
π·οΈ cell_types (9, bionty.CellType): 'dendritic cell', 'B cell, CD19-positive', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-negative classical monocyte', 'CD38-positive naive B cell', 'CD4-positive, alpha-beta T cell', 'CD16-positive, CD56-dim natural killer cell, human'
π·οΈ experimental_factors (1, bionty.ExperimentalFactor): 'single-cell RNA sequencing'
artifact2.view_lineage()
Compare features#
Here we compute shared genes:
artifact1_genes = artifact1.features["var"]
artifact2_genes = artifact2.features["var"]
shared_genes = artifact1_genes & artifact2_genes
len(shared_genes)
749
shared_genes.list("symbol")[:10]
['HES4',
'TNFRSF4',
'SSU72',
'PARK7',
'RBP7',
'SRM',
'MAD2L2',
'AGTRAP',
'TNFRSF1B',
'EFHD2']
Compare cell types#
artifact1_celltypes = artifact1.cell_types.all()
artifact2_celltypes = artifact2.cell_types.all()
shared_celltypes = artifact1_celltypes & artifact2_celltypes
shared_celltypes_names = shared_celltypes.list("name")
shared_celltypes_names
['CD16-positive, CD56-dim natural killer cell, human']
Load the individual artifacts#
We could either load the artifacts into memory or access them in backed
mode through .backed()
to lazily load their content.
Letβs load them into memory:
adata1 = artifact1.load()
adata2 = artifact2.load()
We can now subset the two collections by shared cell types:
adata1_subset = adata1[adata1.obs["cell_type"].isin(shared_celltypes_names)]
adata2_subset = adata2[adata2.obs["cell_type"].isin(shared_celltypes_names)]