You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The following post was exported from discuss.hail.is, a forum for asking questions about Hail which has since been deprecated.
(2024-01-08T19:16:31.454Z) mgarcia said:
I am trying to run the following component of our Variant QC script to find variants that fall outside of HWE. We are filtering variants by MAF thresholds: 0.01 for all ancestry groups, save for the European group, for which our threshold is 0.001.
defflag_hwe(ds, pval_threshold, n_participants):
''' Perform sex aware HWE tests on autosomes and chrX for hg38 '''populations=ds.aggregate_cols(hl.agg.counter(ds.sa.POP)) # produces a dict of populationifNoneinpopulations:
print("WARNING: Skipping HWE test for samples of undefined population")
delpopulations[None]
hwe_tests= {}
# Pre-queue the 3 hwe tests for each populationforpopinpopulations:
#ds_subpop=hl.agg.filter(ds.sa.POP == pop)ds_subpop=hl.variant_qc(ds.filter_cols(ds.sa.POP==pop))
ifpop=='EUR':
maf_thr=0.001elifpop=='AFR':
maf_thr=0.01elifpop=='SAS':
maf_thr=0.01elifpop=='EAS':
maf_thr=0.01ds_subpop=ds_subpop.filter_rows(ds_subpop.variant_qc.AF[1] >maf_thr)
hwe_tests['hwe{}'.format(pop)] =hl.cond(
# Filter 1) hwe{Pop}# If variant is autosomal and there are enough samples in the population, apply HWEds_subpop.locus.in_autosome() & (hl.agg.count_where(ds_subpop.sa.POP==pop) >=n_participants),
hl.agg.filter(ds_subpop.sa.POP==pop, hl.agg.hardy_weinberg_test(ds_subpop.GT)),
# If variant is not autosomal, but in X nonpar and there are enough females in the population, apply HWEhl.cond(
ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP==pop) & (ds.sa.GENDER=="Female")) >=n_participants),
hl.agg.filter((ds.sa.POP==pop) & (ds.sa.GENDER=="Female"), hl.agg.hardy_weinberg_test(ds.GT)),
hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
)
)
hwe_tests['hwe{}FemalePAR'.format(pop)] =hl.cond(
# Filter 2) hwe{Pop}Female# If this is an X par variant and there are enough females in the population, apply HWEds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP==pop) & (ds.sa.GENDER=="Female")) >=n_participants),
hl.agg.filter((ds.sa.POP==pop) & (ds.sa.GENDER=="Female"), hl.agg.hardy_weinberg_test(ds.GT)),
hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
)
hwe_tests['hwe{}MalePAR'.format(pop)] =hl.cond(
# Filter 2) hwe{Pop}Male# If this is an X par variant and there are enough males in the population, apply HWEds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP==pop) & (ds.sa.GENDER=="Male")) >=n_participants),
hl.agg.filter((ds.sa.POP==pop) & (ds.sa.GENDER=="Male"), hl.agg.hardy_weinberg_test(ds.GT)),
hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
)
hwe_tests['hwe{}FemalenonPAR'.format(pop)] =hl.cond(
# Filter 3) hwe{Pop}nonPAR# If this is an X par variant and there are enough subjects in the population, apply HWEds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP==pop) & (ds.sa.GENDER=="Female")) >=n_participants),
hl.agg.filter((ds.sa.POP==pop), hl.agg.hardy_weinberg_test(ds.GT)),
hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
)
hwe_tests['hwe{}MalenonPAR'.format(pop)] =hl.cond(
# Filter 3) hwe{Pop}nonPAR# If this is an X par variant and there are enough subjects in the population, apply HWEds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP==pop) & (ds.sa.GENDER=="Male")) >=n_participants),
hl.agg.filter((ds.sa.POP==pop), hl.agg.hardy_weinberg_test(ds.GT)),
hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
)
# execute testsds=ds.annotate_rows(
hweByPop=hl.struct(
**hwe_tests
)
)
hwe_info= {}
# Pre-queue the 3 info annotations for each population# This exports variant.hwe{}.p_value to an info field variant.info.pHWE{}forpopinpopulations:
hwe_info['pHWE{}'.format(pop)] =ds.hweByPop['hwe{}'.format(pop)].p_valuehwe_info['pHWE{}FemalePAR'.format(pop)] =ds.hweByPop['hwe{}FemalePAR'.format(pop)].p_valuehwe_info['pHWE{}MalePAR'.format(pop)] =ds.hweByPop['hwe{}MalePAR'.format(pop)].p_valuehwe_info['pHWE{}FemalenonPAR'.format(pop)] =ds.hweByPop['hwe{}FemalenonPAR'.format(pop)].p_valuehwe_info['pHWE{}MalenonPAR'.format(pop)] =ds.hweByPop['hwe{}MalenonPAR'.format(pop)].p_value# execute annotationsds=ds.annotate_rows(
info=ds.info.annotate(
**hwe_info
)
)
# Add filter flagsforpopinpopulations:
# Now flag variants based on the failed populationsds=ds.annotate_rows(
filters=hl.cond(
(ds.info['pHWE{}'.format(pop)] !=-1) & (ds.info['pHWE{}'.format(pop)] <pval_threshold),
ds.filters.add('HWE{}'.format(pop)),
ds.filters
)
)
ds=ds.annotate_rows(
filters=hl.cond(
(ds.info['pHWE{}FemalePAR'.format(pop)] !=-1) & (ds.info['pHWE{}FemalePAR'.format(pop)] <pval_threshold),
ds.filters.add('HWE{}FemalePAR'.format(pop)),
ds.filters
)
)
ds=ds.annotate_rows(
filters=hl.cond(
(ds.info['pHWE{}MalePAR'.format(pop)] !=-1) & (ds.info['pHWE{}MalePAR'.format(pop)] <pval_threshold),
ds.filters.add('HWE{}MalePAR'.format(pop)),
ds.filters
)
)
ds=ds.annotate_rows(
filters=hl.cond(
(ds.info['pHWE{}FemalenonPAR'.format(pop)] !=-1) & (ds.info['pHWE{}FemalenonPAR'.format(pop)] <pval_threshold),
ds.filters.add('HWE{}FemalenonPAR'.format(pop)),
ds.filters
)
)
ds=ds.annotate_rows(
filters=hl.cond(
(ds.info['pHWE{}MalenonPAR'.format(pop)] !=-1) & (ds.info['pHWE{}MalenonPAR'.format(pop)] <pval_threshold),
ds.filters.add('HWE{}MalenonPAR'.format(pop)),
ds.filters
)
)
returnds
I have encountered the following error which I am hoping to better understand. Does this error result from referencing of two different MatrixTables: ds_subpop and ds? Thank you in advance.
Note
The following post was exported from discuss.hail.is, a forum for asking questions about Hail which has since been deprecated.
(2024-01-08T19:16:31.454Z) mgarcia said:
I am trying to run the following component of our Variant QC script to find variants that fall outside of HWE. We are filtering variants by MAF thresholds: 0.01 for all ancestry groups, save for the European group, for which our threshold is 0.001.
I have encountered the following error which I am hoping to better understand. Does this error result from referencing of two different MatrixTables: ds_subpop and ds? Thank you in advance.
The text was updated successfully, but these errors were encountered: