| 35 | | Super important: |
| 36 | | # -ss : remove sample details! |
| | 35 | ''The option -ss is crucial here: it removed all sample details.'' |
| | 36 | |
| | 37 | Afterwards, be sure to inspect the log file for warnings! |
| | 38 | |
| | 39 | more stripped.vcf.log |
| | 40 | |
| | 41 | Full manual: |
| | 42 | |
| | 43 | Create a summary VCF per batch: |
| | 44 | -ss : remove sample details! |
| | 45 | -fv PASS : keep only high quality variant calls that pass all filters applied in NextGene. |
| | 46 | Just to be sure: variants should already have been filtered on PASS only in a previous step, |
| | 47 | so this should be redundant here... |
| | 48 | -si : remove all INFO subfields except for INFO:AN and INFO:AC. |
| | 49 | INFO:AN and INFO:AC were automatically updated by vcf-merge, |
| | 50 | but the others were not and may contain erroneous annotation |
| | 51 | that cause vcf-validator to complain the created VCF is not valid. |
| | 52 | -ll : specifies log level, e.g. INFO |
| 46 | | == Troubleshooting == |
| 47 | | # |
| 48 | | # Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples |
| 49 | | # |
| 50 | | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples |
| 51 | | # Fix missing '>' at the end of contig meta-data lines. |
| 52 | | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf |
| 53 | | # Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) |
| 54 | | for item in $(ls CAR_*/*.vcf); \ |
| 55 | | do echo "Processing $item..."; \ |
| 56 | | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ |
| 57 | | bgzip $item\.sorted\.filtered; \ |
| 58 | | tabix -p vcf $item\.sorted\.filtered\.gz; \ |
| 59 | | done |
| | 61 | Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples |
| | 62 | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples |
| | 63 | Fix missing '>' at the end of contig meta-data lines. |
| | 64 | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf |
| | 65 | Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) |
| | 66 | for item in $(ls CAR_*/*.vcf); \ |
| | 67 | do echo "Processing $item..."; \ |
| | 68 | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ |
| | 69 | bgzip $item\.sorted\.filtered; \ |
| | 70 | tabix -p vcf $item\.sorted\.filtered\.gz; \ |
| | 71 | done |
| 61 | | # |
| 62 | | # Merge sample VCFs into one batch VCF. |
| 63 | | # |
| 64 | | vcf-merge CAR_*/*.vcf.sorted.filtered.gz | bgzip -c > merged.vcf.gz |
| 65 | | |
| 66 | | # |
| 67 | | # Create a summary VCF per batch: |
| 68 | | # -ss : remove sample details! |
| 69 | | # -fv PASS : keep only high quality variant calls that pass all filters applied in NextGene. |
| 70 | | # Just to be sure: variants should already have been filtered on PASS only in a previous step, |
| 71 | | # so this should be redundant here... |
| 72 | | # -si : remove all INFO subfields except for INFO:AN and INFO:AC. |
| 73 | | # INFO:AN and INFO:AC were automatically updated by vcf-merge, |
| 74 | | # but the others were not and may contain erroneous annotation |
| 75 | | # that cause vcf-validator to complain the created VCF is not valid. |
| 76 | | # |
| 77 | | ~pneerincx/EclipseWorkspace/ngs_scripts/vcf-fill-gtc.pl -vcfi merged.vcf.gz -vcfo stripped.vcf -ss -fv PASS -si -ll INFO > stripped.vcf.log |
| 78 | | mv stripped.vcf ../CAR_Batch1_106Samples.vcf |
| 79 | | mv stripped.vcf.log ../CAR_Batch1_106Samples.vcf.log |