#use CEU as common variance database, not dbSNP
import sys
def filter_ceu(target_file,background_file='CEU.all.ids'):
x = []
for line in file(background_file):
x.append(line.strip())
mx = {}.fromkeys(x)
for line in file(target_file):
line = line.strip()
if line.startswith('#'):
pass
#print line
else:
toks = line.split('\t')
if not mx.has_key(toks[2]):
print line
else:
pass
if __name__=='__main__':
filter_ceu(sys.argv[1])
No comments:
Post a Comment