#Set parameters and values
GSAPUBFTP_PATH="bundle/2.8/b37"
ROOT="/volume/"
RESDIR="$ROOT/resources/b37/"
TMPDIR="$ROOT/tmp"
BWAVERSION="0.7.12-goolf-1.7.20"


###Create tmp directory and cd to it
mkdir -p $TMPDIR
cd $TMPDIR


###wget resources
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.dict.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.dict.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.fai.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.fai.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.idx.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.idx.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.idx.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.idx.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.gz.md5
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.idx.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.idx.gz.md5
wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_baits_b37_human_g1k_v37.bed
wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_baits_b37_human_g1k_v37.interval_list
wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_exons_b37_human_g1k_v37.bed
wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_exons_b37_human_g1k_v37.interval_list
wget https://molgenis26.target.rug.nl/downloads/intervals/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.chr1.intervals


###Generate correct md5sum files
for i in $( ls *.md5 );
do
echo "Generating correct md5sum file for $i";

SUM=`awk '{print $1}' $i`;
FILENAME=`cat $i | xargs -0 basename`;

echo -e "$SUM  $FILENAME" > $i.proper.md5;

rm $i;

done


###Check md5sums generated files
for i in $( ls *.proper.md5 );
do
echo "Checking md5sum for file $i";

md5sum -c $i;

done


###Unzip all *.gz files
for i in $( ls *.gz );
do
echo "Gunzipping file $i";

gunzip $i;

done


###Index reference genome using BWA
module use /srv/molgenis/.local/easybuild/modules/all/
module avail
module load BWA/$BWAVERSION

bwa index -p human_g1k_v37.fasta -a bwtsw human_g1k_v37.fasta


###Create folder structure for resources
mkdir -p $RESDIR/indices/
mkdir -p $RESDIR/intervals/
mkdir -p $RESDIR/snp/dbSNP/
mkdir -p $RESDIR/snp/1000G/
mkdir -p $RESDIR/sv/1000G


###Move all resources to correct folder
mv human_g1k_v37.dict $RESDIR/indices/
mv human_g1k_v37.fasta $RESDIR/indices/
mv human_g1k_v37.fasta.amb $RESDIR/indices/
mv human_g1k_v37.fasta.ann $RESDIR/indices/
mv human_g1k_v37.fasta.bwt $RESDIR/indices/
mv human_g1k_v37.fasta.pac $RESDIR/indices/
mv human_g1k_v37.fasta.sa $RESDIR/indices/
mv human_g1k_v37.fasta.fai $RESDIR/indices/
mv Mills_and_1000G_gold_standard.indels.b37.vcf $RESDIR/sv/1000G
mv Mills_and_1000G_gold_standard.indels.b37.vcf.idx $RESDIR/sv/1000G
mv dbsnp_138.b37.vcf $RESDIR/snp/dbSNP/
mv dbsnp_138.b37.vcf.idx $RESDIR/snp/dbSNP/
mv dbsnp_138.b37.excluding_sites_after_129.vcf $RESDIR/snp/dbSNP/
mv dbsnp_138.b37.excluding_sites_after_129.vcf.idx $RESDIR/snp/dbSNP/
mv 1000G_phase1.indels.b37.vcf $RESDIR/sv/1000G
mv 1000G_phase1.indels.b37.vcf.idx $RESDIR/sv/1000G
mv testKit1_baits_b37_human_g1k_v37.bed $RESDIR/intervals/
mv testKit1_baits_b37_human_g1k_v37.interval_list $RESDIR/intervals/
mv testKit1_exons_b37_human_g1k_v37.bed $RESDIR/intervals/
mv testKit1_exons_b37_human_g1k_v37.interval_list $RESDIR/intervals/
mv 1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.chr1.intervals $RESDIR/intervals/

###Remove temporary folder
rm -r $TMPDIR
