Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
titlesolution
collapsetrue
#set up boundaries
seq 0 250000 19750000 > lefts
seq 250000 250000 20000000 > rights
paste lefts rights > bounds.txt


#now build commands to subset the vcf with VCFtools, then pipe the subset into VCF-kit for tree building
awk '{print "vcftools --vcf chr12_for_phylo.vcf --chr chrXII --from-bp "$1 " --to-bp "$2" --recode -c | vk phylo tree upgma - > window_"$1"-"$2".newick &"}' bounds.txt > treeBuildCommands


#check the commands (should have 80 lines)
head treeBuildCommands
wc -l treeBuildCommands


#Double-check you are in an idev session
#If you are in an idev session, and had 80 lines, print to screen and execute


sh treeBuildCommands




#check that you have all trees
ls window*.newick | wc -l




#compile the resulting trees into a single file
#they need to be in order, so we can just simply cat them
#read bounds.txt again to cat them in order
echo ">allTrees.newick" >catTrees
while read bound
 do left=$(echo $bound | cut -d " " -f 1)
 right=$(echo $bound | cut -d " " -f 2)
 echo "cat window_${left}-${right}.newick | tr -d '\n' >> allTrees.newick && echo '' >>allTrees.newick" >> catTrees
done<bounds.txt


#execute the catting commands
sh catTrees


#check that the input is correct size (should be 80)
wc -l allTrees.newick


#now run Twisst on them
~/twisst/twisst.py -t allTrees.newick\
 -w topology_weights.txt\
 -g Pun_X -g Pun_Y -g Sin -g Tym\
 --groupsFile groups_file.txt\
 --method threshold\
 --thresholdTable ~/twisst/threshold_tables/binom_threshold_wilson_CI95_0.1.tsv



#This takes a bit.
#Once it's done, format the output
grep "^#" topology_weights.txt > topos.txt
grep -v "^#" topology_weights.txt > weights.txt


#send the results (all end in *.txt) to personal computer to plot with plot_twisst_results.R

...