Thursday, June 16, 2011

An python script to seperate SNP and Indel calls in one VCF


#!/usr/bin/python
def split_snp_indel(fvcf):
basename = fvcf.rstrip('.vcf')
fsnp = file(basename+'.snp.vcf','w')
findel = file(basename+'.indel.vcf','w')
for line in file(fvcf):
line = line.strip()
if line.startswith('#'):
print >>fsnp, line
print >> findel, line
else:
toks = line.split('\t')
ref = toks[3]
alt = toks[4]
if len(ref)> 1 or len(alt)> 1:
print >> findel, line
else:
print >>fsnp, line

if __name__=='__main__':
import sys
split_snp_indel(sys.argv[1])


1 comment:

  1. The most recent version of vcftools include an option to filter indels and SNPs. Check http://www.biostars.org/post/show/48204/split-vcf-file-into-snps-and-indels/

    ReplyDelete