Monday, November 14, 2011

Trac and Mercurial

#mkdir tool; cd tool

#Python-2.7.2
wget http://www.python.org/ftp/python/2.7.2/Python-2.7.2.tgz
tar -zxvf Python-2.7.2.tgz
cd Python-2.7.2
./configure --prefix=/home/u0592675/tool/python-2.7.2
make
make install


#apache
wget http://www.eng.lsu.edu/mirrors/apache//httpd/httpd-2.2.21.tar.gz
tar -zxvf httpd-2.2.21.tar.gz
cd httpd-2.2.21
./configure --prefix=/home/u0592675/tool/apache-2.2.21
make
make install

#mod_wsgi
wget http://modwsgi.googlecode.com/files/mod_wsgi-3.3.tar.gz
tar -zxvf mod_wsgi-3.3.tar.gz
cd mod_wsgi-3.3
./configure --with-apxs=/home/u0592675/tool/apache-2.2.21/bin/apxs --with-python=/home/u0592675/tool/python_2.7.2/bin/python
make
make install


#Trac
wget http://ftp.edgewall.com/pub/trac/Trac-0.12.2.tar.gz
wget http://peak.telecommunity.com/dist/ez_setup.py
~/tool/python-2.7.2/bin/python ez_setup.py



#
trac-admin /home/ying/mercurial_database/tomato initenv
trac-admin /home/ying/mercurial_database/tomato deploy /home/ying/web/tomato_trac


{'mod_wsgi.listener_port': '80',
'HTTP_COOKIE': 'trac_form_token=0b8daa13a5ec1c5f4819083c; trac_session=80547f6793f2d13cbfea5b1c; trac_form_token=a30953232455e34b94f82aa5; trac_session=d5bdc2b3269ef9a4001807dd',
'mod_wsgi.listener_host': '',
'SERVER_SOFTWARE': 'Apache/2.2.21 (Unix) mod_wsgi/3.3 Python/2.7.2+',
'SCRIPT_NAME': '/tomato',
'mod_wsgi.handler_script': '',
'SERVER_SIGNATURE': '',
'REQUEST_METHOD': 'GET',
'PATH_INFO': '/',
'SERVER_PROTOCOL': 'HTTP/1.1',
'QUERY_STRING': '',
'HTTP_ACCEPT_CHARSET': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'HTTP_USER_AGENT': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.106 Safari/535.2', 'HTTP_CONNECTION': 'keep-alive', 'SERVER_NAME': '155.100.213.215', 'REMOTE_ADDR': '155.100.213.215', 'mod_wsgi.request_handler': 'wsgi-script', 'wsgi.url_scheme': 'http', 'PATH_TRANSLATED': '/home/ying/tool/apache-2.2.21/htdocs/index.html', 'SERVER_PORT': '80', 'wsgi.multiprocess': True, 'mod_wsgi.input_chunked': '0', 'SERVER_ADDR': '192.168.247.129', 'DOCUMENT_ROOT': '/home/ying/tool/apache-2.2.21/htdocs', 'mod_wsgi.process_group': '', 'SCRIPT_FILENAME': '/home/ying/trac/cgi-bin/trac.wsgi', 'SERVER_ADMIN': 'you@example.com', 'wsgi.input': , 'HTTP_HOST': '155.100.213.215', 'wsgi.multithread': False, 'mod_wsgi.callable_object': 'application', 'REQUEST_URI': '/tomato/', 'HTTP_ACCEPT': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'wsgi.version': (1, 1), 'GATEWAY_INTERFACE': 'CGI/1.1', 'wsgi.run_once': False, 'wsgi.errors': , 'REMOTE_PORT': '6064', 'HTTP_ACCEPT_LANGUAGE': 'en-US,en;q=0.8', 'mod_wsgi.version': (3, 3), 'mod_wsgi.application_group': '',
'mod_wsgi.script_reloading': '1',
'wsgi.file_wrapper': ,
'HTTP_ACCEPT_ENCODING': 'gzip,deflate,sdch'}

[Tue Nov 08 11:15:24 2011] [error]

prepare GATK resource for pipeline

Three VCF Files for known SNPs from GATK resource

1000G_omni2.5.hg19.sites.vcf -> hg19.1000G_omni2.5.vcf
hapmap_3.3.hg19.sites.vcf -> hg19.hapmap_3.3.vcf
dbsnp_132.hg19.vcf -> hg19.dbsnp_132.vcf

Also copy corresponding ".idx" files to DATA_PATH


mv 1000G_omni2.5.hg19.sites.vcf hg19.1000G_omni2.5.vcf
mv hapmap_3.3.hg19.sites.vcf hg19.hapmap_3.3.vcf
mv dbsnp_132.hg19.vcf hg19.dbsnp_132.vcf

##### ERROR MESSAGE: Input files /scratch/local/4/hg19.dbsnp_132.vcf and reference have incompatible contigs: Order of contigs differences, which is unsafe.
##### ERROR /scratch/local/4/hg19.dbsnp_132.vcf contigs = [chrM, chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22, chrX, chrY]
##### ERROR reference contigs = [chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22, chrX, chrY, chrM]


1. get "dbsnp_132.hg19.vcf.gz" from "ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/1.2/hg19/"

2. gunzip dbsnp_132.hg19.vcf.gz

3. awk '/^chrM/ {print $0}' dbsnp_132.hg19.vcf > m.vcf

4. awk '/^chrM/ {next} {print $0}' dbsnp_132.hg19.vcf > wom.vcf

5. cat m.vcf >> wom.vcf

6. mv wom.vcf hg19.dbsnp_132.vcf

7. vcf-validator hg19.dbsnp_132.vcf
The header tag contig already exists, ignoring.
...same as above...
Warning: Duplicate entries, for example chr1:120995

8. vcf-validator dbsnp_132.hg19.vcf
The header tag contig already exists, ignoring.
...same as above...
Warning: Duplicate entries, for example chrM:16189


8. /home/u0592675/vcftools_0.1.7/bin/vcf-validator hg19.dbsnp_132.vcf

10 ./home/u0592675/vcftools_0.1.7/bin/vcf-validator dbsnp_132.hg19.vcf



awk '/^chr/ {print $1}' /tomato/data/hg19.dbsnp_132.vcf | sort |uniq
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr20
chr21
chr22
chrX
chrY
chrM

awk '/^chr/ {print $1}' hg19.1000G_omni2.5.vcf | sort |uniq
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr20
chr21
chr22
chrX
chrY


awk '/^chr/ {print $1}' hg19.hapmap_3.3.vcf | sort |uniq
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr20
chr21
chr22
chrX


1.
awk '/^chrM/ {print $0}' ../res/hg19/dbsnp_132.hg19.vcf > m.vcf
awk '/^chrM/ {next} {print $0}' ../res/hg19/dbsnp_132.hg19.vcf > wom.vcf
cat m.vcf >> wom.vcf
rm m.vcf
mv wom.vcf hg19.dbsnp_132.vcf

2.
vim hg19.dbsnp_132.vcf
delete all non-mapped contigs in header section ##contig

hg19.hapmap_3.3.vcf


hg push ssh://hiseq@hci-bio2.hci.utah.edu:/home/hiseq/tomato/
hg clone http://hci-bio2.hci.utah.edu:8011 tomato

Thursday, November 3, 2011

Got new genome fasta file?

#1. hg19.fasta.idx
samtools faidx hg19.fasta

#2. hg19.dict
java -Xmx2g -jar ~/tool/picard/CreateSequenceDictionary.jar R=hg19.fasta O=hg19.dict

Tuesday, November 1, 2011

Mercurial

hg push ssh://hiseq@bio2:/home/hiseq/tomato/
hg clone http://bio2:1234 tomato

prepare dbsnp132

1.
awk '/^chrM/ {print $0}' ../res/hg19/dbsnp_132.hg19.vcf > m.vcf
awk '/^chrM/ {next} {print $0}' ../res/hg19/dbsnp_132.hg19.vcf > wom.vcf
cat m.vcf >> wom.vcf
rm m.vcf
mv wom.vcf hg19.dbsnp_132.vcf

2.
vim hg19.dbsnp_132.vcf
delete all non-mapped contigs in header section ##contig