Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
titleDe novo locus generation
#navigate to the directory
cd denovo_2bRAD


#look at starting trimmed fastq files
ls *_trimmed.trimfastq
 
        sampleA.trim  sampleB.trim  sampleC.trim
 
#run uniquerOne.pl
#(this is analogous to making 'stacks' in STACKS (Fig1A Catchen et al. (2011))
#finds the unique RAD tags from each fastq
 
uniquerOne.pl sampleA_trimmed.trimfastq > sampleA.trim.uni
uniquerOne.pl sampleB_trimmed.trimfastq > sampleB.trim.uni
uniquerOne.pl sampleC_trimmed.trimfastq > sampleC.trim.uni
 
# merging uniqued files
#(Fig1B Catchen et al. (2011))
mergeUniq.pl uni minDP=2 >mydataMerged.uniq
 
#generates a merged set of unique tags:
        mergedUniqTags.fasta
 
# clustering allowing for up to 3 mismatches (-c 0.91); the most abundant sequence becomes reference
#This is equivalent to calling loci (Fig1C-D Catchen et al. (2011))
module load cd-hit
cd-hit-est -i mergedUniqTags.fasta -o cdh_alltags.fas -aL 1 -aS 1 -g 1 -c 0.91 -M 0 -T 0


#now we have called de novo loci based on the tags (70758 total)
#assemble them into an artificial reference for re-mapping and genotyping
concatFasta.pl fasta=cdh_alltags.fas num=8
 
#index the artificial reference with bowtie
module load bowtie
bowtie2-build cdh_alltags_cc.fasta cdh_alltags_cc.fasta
 
#now map the reads back to the artificial reference
 create mapping commands using a for loop
>mappingCommands
for file in *_trimmed.fastq
 do echo "bowtie2 --no-unal -x cdh_alltags_cc.fasta -U $file -S ${file/_trimmed.fastq/}.sam -p 12">>mappingCommands
done


#look at the mapping commands
cat mappingCommands


#returns:
	bowtie2 --no-unal -x cdh_alltags_cc.fasta -U sampleC_trimmed.trimfastq -S sampleC.trim.bt2.sam
sam -p 12 &
	bowtie2 --no-unal -x cdh_alltags_cc.fasta -U sampleB_trimmed.trimfastq -S sampleB.trim.bt2.sam -p 12 &
	bowtie2 --no-unal -x cdh_alltags_cc.fasta -U sampleA_trimmed.trimfastq -S sampleA.trim.bt2.sam
 
#The sam -p 12 &


#These will use bowtie to map the reads back to the clustered loci we generated from them
#copy the commands and paste them to execute
#The resulting alignment files can now be used for whichever genotyping method you prefer

Genotyping

Return to ddRAD data processing and alter the commands in the mpileup and quality filtering code blocks to genotype these samples

Code Block
titleexpected results
collapsetrue
#Raw calls should have 3 individuals and 14909 sites


#Qaulity filtering these to 'QUAL < 30' reduces to 9689 sites


#Filter for biallelic sites variants, genotyped in 75% of samples, with at least two reads covering the site:
6631 sites