Load the package (and dplyr for downstream data frame manipulation):
library(bedrockbio)
library(dplyr)
List available datasets:
Load a dataset:
# all partition fields for a dataset must be specified, see docs for dataset-specific partition columns
df <- load_dataset('ukb_ppp.pqtls', ancestry="EAS", protein_id="A0FGR8", panel="Neurology_II")
Manipulate with dplyr verbs:
df |>
select(
chromosome,
position,
effect_allele,
other_allele,
beta,
neg_log_10_p_value
) |>
slice_max(
neg_log_10_p_value, n=10
) |>
mutate(
p_value=10^(-neg_log_10_p_value)
)
Load the package (and polars for downstream data frame manipulation):
import bedrock_bio as bb
import polars as pl
List available datasets:
Load a dataset into a Polars DataFrame:
# all partition fields for a dataset must be specified, see docs for dataset-specific partition columns
df = bb.load_dataset('ukb_ppp.pqtls', ancestry='EUR', protein_id='A0FGR8', panel='Neurology_II').pl()
Manipulate with Polars methods:
df \
.select(
'chromosome',
'position',
'effect_allele',
'other_allele',
'beta',
'neg_log_10_p_value'
) \
.top_k(
10, by='neg_log_10_p_value'
) \
.with_columns(
10**-pl.col('neg_log_10_p_value')
)