Tuesday, September 21, 2010

Simulate data for pipeline

Organsim, CE6

#download
wget http://sourceforge.net/projects/maq/files/maq-data/20080929/calib-36.dat.gz/download

#unzip
gzip -d calib-36.dat.gz

#simulate reads
maq simulate ce6_1.fq ce6_2.fq ce6.fa calib-36.dat

#make indexed genome
novoindex ce6.ndx ce6.fa

#Align reads to ce6 genome
time novoalign -d ce6.ndx -d ce6_1.fq ce6_2.fq | grep chr > A.ce6.nal

real 1m43.108s
user 6m39.372s
sys 0m4.652s

#Format to MAQ
novo2maq A.map - A.ce6.nal

#Convert the reference sequences to the binary fasta format
#maq fasta2bfa ref.fasta ref.bfa

#Build the mapping assembly
maq assemble A.cns ref.bfa A.map 2>assemble.log

#Extract consensus sequences and qualities
maq cns2fq A.cns >A.cns.fq

#Extract list of SNPs
maq cns2snp A.cns >A.snp


No comments:

Post a Comment