Skip to content

Commit fd075f7

Browse files
committed
add MHC exclusion step
1 parent 0882044 commit fd075f7

1 file changed

Lines changed: 11 additions & 0 deletions

File tree

convert_cleansumstats_output_to_mixer_format.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@
2020
if 'B' in df.columns: df.rename(columns={'B':'BETA'}, inplace=True)
2121
if 'EAF' in df.columns: df.rename(columns={'EAF':'FRQ'}, inplace=True)
2222
print('renamed columns: ' + ' '.join(df.columns))
23+
24+
sumstats_len = len(df)
25+
df['BP'] = pd.to_numeric(df['BP'], errors='coerce')
26+
df.dropna(subset=['BP'], inplace=True)
27+
df['BP'] = df['BP'].astype(int)
28+
print(f'Drop {sumstats_len - len(df)} variants due to non-numeric or missing values in BP column')
29+
30+
idx = (df['CHR'] == 6) & (df['BP'] >= 25e6) & (df['BP'] < 35e6)
31+
print(f'drop MHC variants (chr6:25-35): {np.sum(idx)} variants removed, {np.sum(~idx)} variants retained')
32+
df = df[~idx].copy()
33+
2334
print(f'writing {fname_out}...')
2435
df.to_csv(fname_out, sep='\t', index=False)
2536
print('done.')

0 commit comments

Comments
 (0)