#Download human genome HG19 from UCSC
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz
hg19EnsKwn45bpSplices.CSnovoindex.gz 5562353922 5304.674MB
hg19EnsKwn45bpSplices.CSnovoindex 7438882817 7094.271MB  
>time gunzip hg19EnsKwn45bpSplices.CSnovoindex.gz 
real    3m43.081s
user    0m39.760s
sys     1m9.550s
100517_7355X4_s_7_sequence.txt.gz 1755738672	1674.403MB
6686206832 2010-06-30 10:57 100517_7355X4_s_7_sequence.txt
>time gunzip 100517_7355X4_s_7_sequence.txt.gz
>time ./novoalign -d genome/hg19EnsKwn45bpSplices.CSnovoindex  -f read/100517_7355X4_s_7_sequence.txt -F ILMFQ >hello.txt
#NovoalignCS
wget http://www.novocraft.com/downloads/download.php?filename=Linux/V2.0/novoalignCSV1.00.09.gcc.tar.gz
hg19EnsKnGn45bpSplices.fasta.gz
167905261 2010-08-03 05:56 hg19EnsKnGn45bpSplices.fasta.gz
>time ./novoindex hg19.nix genome/hg19.fasta
# novoindex (1.0) - Universal k-mer index constructor.
# (C) 2008 NovoCraft
# novoindex hg19.nix genome/hg19.fasta 
# Creating 5 indexing threads.
# Building with 13-mer and step of 2 bp.
# novoindex construction dT = 257.0s
# Index memory size   2.383Gbyte.
# Done.
real    4m17.235s
user    2m5.360s
sys     5m50.900s
hg19.nix 2558416377
time ./novoalign -d genome/hg19.nix  -f read/100517_7355X4_s_7_sequence.txt -F ILMFQ >hello.txt
>grep -c '@' 100517_7355X4_s_7_sequence.txt
29283087
So this means this file has 29283087 fastq entries (reads)
No comments:
Post a Comment