import random
import sys
random.seed(13115)
def iter_chromosome(fasta):
m = {}
seq = []
chr = ''
for line in file(fasta):
line = line.strip()
if line.startswith('>'):
if seq: #
yield chr,''.join(seq)
seq = []
chr = line
else:
seq.append(line)
yield chr,''.join(seq)
def make_snp(ref):
x = ['A','G','C','T']
x.remove(ref)
return random.choice(x)
def generate_SNP(fasta,interval=1250):
import numpy
exon = {}.fromkeys(('A','G','C','T'))
snp = []
fsnp=file('snp.txt','w')
for chr, seq in iter_chromosome(fasta):
dup = [s for s in seq]
N = len(dup)
num_required=N/interval
pos = [int(i) for i in numpy.random.normal(interval,interval/5,num_required)]
index = 0
for p in pos:
index = index + p
try:
ref = dup[index-1]
if exon.has_key(ref):
alt = make_snp(ref)
dup[index-1] = alt
snp.append('\t'.join((chr.lstrip('>'),str(index),ref,alt)))
except IndexError:
break
print chr
for i in xrange(0, len(dup), 50):
print ''.join(dup[i:i+50])
print >>fsnp,'\n'.join(snp)
fsnp.close()
fasta = 'hg19.fa'
generate_SNP(fasta,250)
No comments:
Post a Comment