| | 1 | Placeholder |
| | 2 | |
| | 3 | http://sourceforge.net/projects/samtools/files/tabix/ |
| | 4 | http://vcftools.sourceforge.net/ |
| | 5 | |
| | 6 | https://github.com/molgenis/ngs-utils/blob/master/scripts/vcf-fill-gtc.pl |
| | 7 | |
| | 8 | Super important: |
| | 9 | # -ss : remove sample details! |
| | 10 | |
| | 11 | |
| | 12 | |
| | 13 | |
| | 14 | # |
| | 15 | # Add bgzip and tabix to your environment. |
| | 16 | # |
| | 17 | export PATH=/Volumes/Users/Software/vcftools_0.1.10/bin/:/Volumes/Users/Software/tabix-0.2.6/:${PATH} |
| | 18 | |
| | 19 | # |
| | 20 | # Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples |
| | 21 | # |
| | 22 | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples |
| | 23 | # Fix missing '>' at the end of contig meta-data lines. |
| | 24 | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf |
| | 25 | # Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) |
| | 26 | for item in $(ls CAR_*/*.vcf); \ |
| | 27 | do echo "Processing $item..."; \ |
| | 28 | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ |
| | 29 | bgzip $item\.sorted\.filtered; \ |
| | 30 | tabix -p vcf $item\.sorted\.filtered\.gz; \ |
| | 31 | done |
| | 32 | |
| | 33 | # |
| | 34 | # Merge sample VCFs into one batch VCF. |
| | 35 | # |
| | 36 | vcf-merge CAR_*/*.vcf.sorted.filtered.gz | bgzip -c > merged.vcf.gz |
| | 37 | |
| | 38 | # |
| | 39 | # Create a summary VCF per batch: |
| | 40 | # -ss : remove sample details! |
| | 41 | # -fv PASS : keep only high quality variant calls that pass all filters applied in NextGene. |
| | 42 | # Just to be sure: variants should already have been filtered on PASS only in a previous step, |
| | 43 | # so this should be redundant here... |
| | 44 | # -si : remove all INFO subfields except for INFO:AN and INFO:AC. |
| | 45 | # INFO:AN and INFO:AC were automatically updated by vcf-merge, |
| | 46 | # but the others were not and may contain erroneous annotation |
| | 47 | # that cause vcf-validator to complain the created VCF is not valid. |
| | 48 | # |
| | 49 | ~pneerincx/EclipseWorkspace/ngs_scripts/vcf-fill-gtc.pl -vcfi merged.vcf.gz -vcfo stripped.vcf -ss -fv PASS -si -ll INFO > stripped.vcf.log |
| | 50 | mv stripped.vcf ../CAR_Batch1_106Samples.vcf |
| | 51 | mv stripped.vcf.log ../CAR_Batch1_106Samples.vcf.log |