Tutorial 2: Sensitivity analyses

Set up a PHRAPL analysis
Sensitivity analyses
1. Postprocess RDA files per subset
2. Plot subsets

Set up a PHRAPL analysis

In your R terminal type:

**************************************************************************

download.file("https://raw.githubusercontent.com/ariadnamorales/phrapl-manual/master/data/exampleData/1.Subsampling_GridSearch_Post.R", destfile="1.Subsampling_GridSearch_Post.R")

system("R CMD BATCH 1.Subsampling_GridSearch_Post.R > 1.Subsampling_GridSearch_Post.R.out")

**************************************************************************

Load input files from GitHub

Do not run this script in your R terminal, it will be running in the background while we discuss what is going on:

setwd("/working_path/sensitivityAnalyses")  # <----- Change this line if your are running the tutorial step by step

## Create output dirs
	system(paste0("mkdir ", getwd(),  "/results"))
	system(paste0("mkdir ", getwd(),  "/results/RoutFiles"))

## Load libraries and functions
library(ape)
library(partitions)
library(phrapl)

## migrationArray
load(url("https://github.com/ariadnamorales/phrapl-manual/raw/master/data/sensitivityAnalyses/example/input/MigrationArray_2pop_3K.rda"))

## Input Files
currentAssign<-read.table(file="https://raw.githubusercontent.com/ariadnamorales/phrapl-manual/master/data/sensitivityAnalyses/example_with_outputFiles/input/Pleth_align.txt", head=TRUE)
currentTrees<-ape::read.tree("https://raw.githubusercontent.com/ariadnamorales/phrapl-manual/master/data/sensitivityAnalyses/example_with_outputFiles/input/Pleth_bestTree.tre")

Subsampling

########################
### 1. Subsampling  ####
########################

## Define arguments
subsamplesPerGene<-10 
nloci<-5
popAssignments<-list(c(3,3))

    
## Do subsampling
observedTrees<-PrepSubsampling(assignmentsGlobal=currentAssign,observedTrees=currentTrees,
popAssignments=popAssignments,subsamplesPerGene=subsamplesPerGene,outgroup=FALSE,outgroupPrune=FALSE)

### You will see this error message:
### Warning messages:
#		1: In PrepSubsampling(assignmentsGlobal = currentAssign, observedTrees = currentTrees,  :
#	  Warning: Tree number  1 contains tip names not included in the inputted assignment file. These tips will not be subsampled.
### This is because we exclude a few individuals from the Assignment File to reduce computational time for the sake of this tutorial.

## Get subsample weights for phrapl
subsampleWeights.df<-GetPermutationWeightsAcrossSubsamples(popAssignments=popAssignments,observedTrees=observedTrees)

## Save subsampled Trees and weights
save(list=c("observedTrees","subsampleWeights.df"),file=paste0(getwd(),"/phraplInput_Pleth.rda"))

GridSearch

######################
### 2. GridSearch  ###
######################

## Search details
modelRange<-c(1:5)
popAssignments<-list(c(3,3))  
nTrees<-100      
subsamplesPerGene<-10
totalPopVector<-list(c(4,4))     			## total number of indvs per pop
popScaling<-c(0.25, 1, 1, 1, 1)					## IMPORTANT: one of these loci is haploid

## Run search and keep track of the time
startTime<-as.numeric(Sys.time())

result<-GridSearch(modelRange=modelRange,
	migrationArray=migrationArray,         
	popAssignments=popAssignments,
	nTrees=nTrees,
	observedTree=observedTrees,
	subsampleWeights.df=subsampleWeights.df,
	print.ms.string=TRUE,
	print.results=TRUE,
	debug=TRUE,return.all=TRUE,
	collapseStarts=c(0.30,0.58,1.11,2.12,4.07),
	migrationStarts=c(0.10,0.22,0.46,1.00,2.15),
	subsamplesPerGene=subsamplesPerGene,
	totalPopVector=totalPopVector,
	popScaling=popScaling,					## IMPORTANT: one of these loci is haploid
	print.matches=TRUE)

# Grid list for Rout file
gridList<-result[[1]]

#Save the workspace and cleaning
save(list=ls(), file=paste0(getwd(),"/results/Pleth_",min(modelRange),"_",max(modelRange),".rda"))
system(paste0("mv ", getwd(), "/1.Subsampling_GridSearch_Post.Rout ", getwd(), "/results/RoutFiles/1.Subsampling_GridSearch_Post.Rout"))
system(paste0("rm ", getwd(), "/1.Subsampling_GridSearch_Post.R.out"))	

When you see these output files go to next step:

We will start from here:

Post-process

*****************************************************************

setwd("/working_path/sensitivityAnalyses") ### working Dir in previous script

########################
### 3. Post-process  ###
########################
## load libraries and custom functions
library(phrapl)
source("https://github.com/ariadnamorales/phrapl-manual/raw/master/data/sensitivityAnalyses/example/scripts/GenerateSetLoci_cum8_v3.R")

## migrationArray
load(url("https://github.com/ariadnamorales/phrapl-manual/raw/master/data/sensitivityAnalyses/example/input/MigrationArray_2pop_3K.rda"))

## If different models (from the same migrationArray file) were run separatly, the output can be concatenated.
totalData<-ConcatenateResults(rdaFilesPath=paste(getwd(),"/results/",sep=""),rdaFiles=NULL,addAICweights=TRUE,addTime.elapsed=FALSE)
modelAverages<-CalculateModelAverages(totalData, parmStartCol=9)

## Save output to a txt file
write.table(totalData, file=paste(getwd(),"/results/totalData.txt",sep=""), sep="\t", row.names=FALSE, quote=FALSE)

## Plot the best model
PlotModel(migrationArray[[3]], taxonNames=c("S","N"))
#PlotModel(migrationArray[[5]], taxonNames=c("S","N"))
#PlotModel(migrationArray[[4]], taxonNames=c("S","N"))

*****************************************************************

Plot of the ‘best’ model:

Sensitivity analyses

*****************************************************************

################################
### 4. Sensitivity analyses  ###
################################

## Load libraries
library(gplots)

## Load phrapl input (subsampled dataset)
load(paste0(getwd(),"/phraplInput_Pleth.rda"))

## Create output folders
system(paste("mkdir ", getwd(), "/results/subsets",sep=""), ignore.stderr=TRUE)

#########################################################
### Define arguments (some of these were already defined)
nloci<-5
NintervalLoci<-1
modelRange<-c(1:5)
lociRange<-c(1:5)
subsamplesPerGene<-10
popAssignments<-list(c(3,3))
collapseStarts<-c(0.3, 0.58, 1.11, 2.12, 4.07)   
migrationStarts<-c(0.1, 0.22, 0.46, 1, 2.15) 
n0multiplierStarts<-NULL
setCollapseZero<-NULL
nTrees<-100
dAIC.cutoff<-2
nEq<-100
subsampleWeightsVec=subsampleWeights.df[[1]][,1]
migrationArrayShort<-migrationArray[modelRange]

#####################################
### Load files with match vectors ###
rdaFilename<-paste(getwd(),'/results/Pleth_1_5.rda', sep="")

system(paste("grep matches -A1 ",getwd(),"/results/RoutFiles/1.Subsampling_GridSearch_Post.Rout | grep -v matches | grep 0 > ",getwd(),"/results/RoutFiles/matches_2.RunGridSearch.Rout.txt", sep=""))
RoutFilename<-read.table(file=paste(getwd(),'/results/RoutFiles/matches_2.RunGridSearch.Rout.txt', sep="") ,skip=1)

#####################################################
### Recalculate model averages in subsets of loci ###

# Command line to run function GenerateSetLoci
GenerateSetLoci(lociRange=lociRange,NintervalLoci=NintervalLoci,RoutFilename,rdaFilename,migrationArray=migrationArray,modelRange=modelRange,subsamplesPerGene=subsamplesPerGene,collapseStarts=collapseStarts,migrationStarts=migrationStarts,n0multiplierStarts=n0multiplierStarts,setCollapseZero=setCollapseZero,cumulative=TRUE,nTrees=nTrees,dAIC.cutoff=2,nEq=nEq, subsampleWeightsVec=subsampleWeightsVec)

*****************************************************************

When you see these output files go to next step:

Postprocess RDA files per subset

*****************************************************************

#########################################
### Postprocess RDA files per subset  ###

# Set paths to input and output files ---> Files created by GenerateSetLoci.
pathSubsets<-paste0(getwd(), "/results/subsets/")         
pathtotalData_subsets<-paste0(getwd(), "/results/subsets/")

# Loop to analyze output files ----> Will generate a totalData file per subset per model
# You may need to create one folder per subset and move files to each folder
for(i in 1:5){                    ## ------> Be careful to specify the total number of subsets (in this case 5)
    totalData<-list()
    totalData<-ConcatenateResults(rdaFilesPath=paste0(getwd(), "/results/subsets/subset",i,"/"),rdaFiles=NULL, migrationArray, rm.n0=TRUE, longNames = TRUE, outFile=NULL,addAICweights=TRUE,addTime.elapsed=FALSE, nonparmCols = 4)
    write.table(totalData, file=paste(pathtotalData_subsets,"totalData_subset",i,".txt",sep=""), sep="\t", row.names=FALSE)
}

*****************************************************************

When you see these output files go to next step:

Plot subsets

*****************************************************************

########################
### 5. Plot subsets  ###
########################

pathInputFiles<-paste0(getwd(), "/results/subsets/")

totalData_list<-list.files(pathInputFiles, pattern="*.txt", full.names=FALSE)

totalData_files<-c()
for(x in 1:length(totalData_list)){
    totalData_files<-append(totalData_files,list(read.table(paste(pathInputFiles,totalData_list[x],sep=""),header=TRUE, fill=TRUE, sep="\t")))
}

#Sort each dataframe by model
totalData_filesSorted<-c()
for(x in 1:length(totalData_list)){
	totalData_filesSorted<-append(totalData_filesSorted,list(totalData_files[[x]][order(totalData_files[[x]]$models),]))
}

#Create a dataframe with all models and their wAIC
model_wAIC<-data.frame("models"=c(1:5),row.names=NULL)
for(x in 1:length(totalData_list)){
    model_wAIC[,paste("Subset ",x,sep="")]<-totalData_filesSorted[[x]][,"wAIC"]
}

########################
## Define plot settings

## Row Labels
row.label<-c()
setLoci<-0
for(i in 1:length(totalData_list)){
    setLoci<-setLoci+8      ##loci interval
    row.label<-append(row.label, c(paste(setLoci, " Loci",sep="")))
}

## Row labels ---> 
row.labels <- rep("", 5)

## Col Labels
col.label<-seq(1:5)

## Color scale
bks<-c(seq(0,0.99,by=0.05))
lm.palette <- colorRampPalette(c("light yellow","orange", "tomato"), space = "rgb")

## Possition of the plot and legend
lmat <- rbind(c(0,3,3,3),c(2,1,1,1),c(0,0,0,4))
lwid <- c(0.2,1,1,1)
lhei <- c(0.3,2.1,0.5)

pdf(file=paste(getwd(),"/results/wAICinSubsets.pdf", sep=""), width=9, height=7)
heatmap.2(as.matrix(t(model_wAIC[2:length(model_wAIC)])), lmat = lmat, lhei=lhei,lwid=lwid,
	Rowv=FALSE,Colv=FALSE, dendrogram="none", trace="none",
	margins=c(2,12),
	col=lm.palette(length(bks)-1), breaks=bks,
	#colsep=1:5,
	rowsep=1:(length(model_wAIC)-1), sepcolor="white",
	#labRow=row.labels,
	#labCol=col.label,
	key = TRUE,
	#keysize =1.0,
	density.info=c("density"),
	denscol=tracecol,
	key.xlab="wAIC",
	key.par=list(mar=c(4,4,4,4)),
	main="wAIC for each model per locus")
dev.off()

*****************************************************************

With many more loci and models, you could build a plot such as this one.