Tuesday, February 21, 2012

Remove any variances that were in CEU population


#use CEU as common variance database, not dbSNP
import sys
def filter_ceu(target_file,background_file='CEU.all.ids'):
x = []
for line in file(background_file):
x.append(line.strip())
mx = {}.fromkeys(x)
for line in file(target_file):
line = line.strip()
if line.startswith('#'):
pass
#print line
else:
toks = line.split('\t')
if not mx.has_key(toks[2]):
print line
else:
pass

if __name__=='__main__':
filter_ceu(sys.argv[1])

No comments:

Post a Comment