#how many raw snps?
$awk '!/#/ {print $0}' 7550X1.snps.raw.vcf | wc -l
3492007
#filter dbSNP member and retain PASSed SNPs.
$awk '!/#/ {print $0}' 7550X1.snps.raw.vcf | awk '$3 !~ "rs" {print $0}' | awk '$7 ~ "PASS" {print $0}' > 7550x1.nodbsnp.pass.vcf
#how many novel SNPs?
$wc -l 7550x1.nodbsnp.pass.vcf
312776
$vcf-validator 7550x1.nodbsnp.pass.vcf
#error , no header
$head -n 22 7550X1.snps.raw.vcf > head.txt
$cat head.txt 7550x1.nodbsnp.pass.vcf > 7550x1.vcf
#have to build index!
$bgzip 7550x1.vcf
$tabix -p vcf 7550x.vcf.gz
#the commom snps between x1 and x4 (affected)
$vcf-isec -n =2 7550x1.vcf.gz 7550x4.vcf.gz > x1x4.vcf
$wc -l x1x4.vcf
130480 x1x4.vcf
No comments:
Post a Comment