#Download human genome HG19 from UCSC
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz
hg19EnsKwn45bpSplices.CSnovoindex.gz 5562353922 5304.674MB
hg19EnsKwn45bpSplices.CSnovoindex 7438882817 7094.271MB
>time gunzip hg19EnsKwn45bpSplices.CSnovoindex.gz
real 3m43.081s
user 0m39.760s
sys 1m9.550s
100517_7355X4_s_7_sequence.txt.gz 1755738672 1674.403MB
6686206832 2010-06-30 10:57 100517_7355X4_s_7_sequence.txt
>time gunzip 100517_7355X4_s_7_sequence.txt.gz
>time ./novoalign -d genome/hg19EnsKwn45bpSplices.CSnovoindex -f read/100517_7355X4_s_7_sequence.txt -F ILMFQ >hello.txt
#NovoalignCS
wget http://www.novocraft.com/downloads/download.php?filename=Linux/V2.0/novoalignCSV1.00.09.gcc.tar.gz
hg19EnsKnGn45bpSplices.fasta.gz
167905261 2010-08-03 05:56 hg19EnsKnGn45bpSplices.fasta.gz
>time ./novoindex hg19.nix genome/hg19.fasta
# novoindex (1.0) - Universal k-mer index constructor.
# (C) 2008 NovoCraft
# novoindex hg19.nix genome/hg19.fasta
# Creating 5 indexing threads.
# Building with 13-mer and step of 2 bp.
# novoindex construction dT = 257.0s
# Index memory size 2.383Gbyte.
# Done.
real 4m17.235s
user 2m5.360s
sys 5m50.900s
hg19.nix 2558416377
time ./novoalign -d genome/hg19.nix -f read/100517_7355X4_s_7_sequence.txt -F ILMFQ >hello.txt
>grep -c '@' 100517_7355X4_s_7_sequence.txt
29283087
So this means this file has 29283087 fastq entries (reads)
No comments:
Post a Comment