fitdistr(x,"chisq")
?fitdistr
fitdistr(x,"chi-squared")
fitdistr(x,"chi-squared",list(df = 9)
)
fitdistr(x,"chi-squared",list(df = 1))
fitdistr(x,"chi-squared",list(df = 2))
fitdistr(x,"chi-squared",list(df = 3))
?rchisq
fitdistr(x,"chi-squared",list(df = 3,ncp=0))
fitdistr(x,"chi-squared",list(df = 9,ncp=0))
fitdistr(x,"chi-squared",list(df = 9,ncp=0,log=FALSE))
fitdistr(x,"chi-squared",list(df = 9))
fitdistr(x[1:100],"chi-squared",list(df = 9))
fitdistr(x,"norm")
fitdistr(x,"normal")
fitdistr(x,"gamma")
hist(rgamma(10000,5.3569,220.322))
hist(rgamma(10000,5.3569,220.322),40)
100/8.6
100/5.5
hist(rnorm(1000))
hist(rnorm(1000),40)
plot(density(rnorm(1000),40))
a<-runif(100)
b<-runif(100)
c<-runif(100)
d<-runif(100)
hist(a)
hist(b)
hist(c)
hist(d)
hist(a+b+c+d)
hist(a+b+c+d,20)
e<-runif(100)
f<-runif(100)
g<-runif(100)
h<-runif(100)
hist(a+b+c+d+e+f+g+h,20)
hist(a+b+c+d+e+f+g+h,10)
cor(a,b)
cor(a,c)
cor(a,d)
cor(a,e)
cor(a,f)
cor(a,g)
cor(a,h)
hist(rnorm(5000),20)
hist(runif(100))
hist(rnorm(100))
hist(rnorm(10000))
hist(rnorm(10000),20)
a<-runif(10000)
b<-runif(10000)
c<-runif(10000)
d<-runif(10000)
hist(d,30)
hist(a+b+c+d,30)
library("multicore")
citation("multicore")
library(earthmovdist)
?emv
?emvL1
?emvdL1
?emdL1
set.seed(42)#
  x <- rnorm(100)#
  y <- x + rnorm(100)/10#
  emdL1(x, y, verbose=TRUE)
hist(x)
hist(y)
x
y
install.packages("GenAbel")
install.packages("GenABEL")
data(srdta)
library(GenABEL)
data(srdta)
male(srdta)#
snpnames(srdta)#
chromosome(srdta)#
map(srdta)
head(srdta)
attributes(srdta)
ls()
data(assoc)
library(GenABEL)#
data(ge03d2ex)#
ls()#
class(ge03d2ex)#
names(phdata(ge03d2ex))#
attach(phdata(ge03d2ex))
citation(GenABEL)
citation("GenABEL")
citation("Bioconductor")
citation()
?attach
attributes(ge03d2ex)
head(gtdata(ge03d2ex))
names(gtdata(ge03d2ex))
head(phdata(ge03d2ex))
?qtscore
descriptives.trait(ge03d2ex)#
#
# dm2 = diabetes mellitus type 2 / case-controls#
# case/controls differences in weight (obesity)#
descriptives.trait(ge03d2ex, by=dm2)#
#
descriptives.marker(ge03d2ex)#
#
# It is of note that we can see inflation of the proportion of the tests for HWE
# at particular threshold, as compared to the expected. This may indicate poor
# genotyping quality and/or genetic stratification.#
#
descriptives.marker(ge03d2ex,ids=(dm2==0))#
#
# Apparently, HWE distribution holds better in controls than in the total sample.
# Let us check whether there are indications that deviation from HWE is due
# to cases. At this stage we are only interested in HWE distribution table, and
# therefore will ask to report only table two:
#
#
descriptives.marker(ge03d2ex, ids = (dm2 == 1))[2]#
#
# for controls#
#
descriptives.marker(ge03d2ex, ids = (dm2 == 0))[2]#
#
#
#
# It seems that indeed excessive number of markers are out of HWE in cases. If no
# laboratory procedure (e.g. DNA extraction, genotyping, calling) were done for
# cases and controls separately, this may indicate possible heterogeneity specific
# for cases.#
#
s <- summary(ge03d2ex@gtdata[(dm2 == 1), ])#
s[1:10,]#
#
#
# Note that the column before the last provides P-exact we need. We can extract
# these to a separate vector by
#
#
pexcas <- s[, "Pexact"]
#
#
# and produce the chisq-chisq plot and estimate inflation factor by command estlambda(),#
# which operates with a vector of P-values or chisq:
#
#
estlambda(pexcas)#
#
# By default, this function also produces a chisq - chisq plot, at which you can see
# some extreme deviation of observed from expected. The resulting plot #
# shows extreme deviation for high values of the test. Black line of slope 1:
# expected under no inflation; Red line: fitted slope.#
# Looking at the lambda estimate, we indeed see inflation of the test statistics.
# You can repeat this test for the controls, if time permits.#
#
# We will test now for association in this raw data#
# a fast way is using the function qtscore#
#
# Help in qtscore#
?qtscore#
# Fast score test for association between a trait and genetic polymorphism#
# When formula contains covariates, the traits is analysed using GLM and later residuals #
# used when score test is computed for each of the SNPs in analysis. #
# Coefficients of regression are reported for the quantitative trait.#
# For binary traits, odds ratios (ORs) are reportted. When adjustemnt is performed, first, #
# "response" residuals are estimated after adjustment for covariates and scaled to [0,1]. #
# Reported effects are approximately equal to ORs expected in logistic regression model.#
#
# Run qtscore for our raw data#
an0 <- qtscore(dm2, ge03d2ex, trait = "binomial")#
#
# extract the inflation parameter#
lambda(an0)#
#
plot(an0,col="black")#
add.plot(an0, df = "Pc1df", col = "green")#
#
descriptives.scan(an0)#
#
#
an0.e <- emp.qtscore(dm2, ge03d2ex)#
descriptives.scan(an0.e, sort = "Pc1df")
# How we test the significance of the obtained values?
# We can resample our data to obtain an empirical distribution
# of p-values

# help with emp.qtscore() function 
?emp.qtscore

# In the analysis of empirical significance, first time the function qtscore is called
# and result object is saved. Later, the function qtscore is called times times with
# replace=FALSE in order to generate distribution under the null. Each call, minimal P
# value is extracted and compared with original P-values. For a particular SNP, empirical
# P-value is obtained as a proportion of times minimal Ps from resampled data was less
# then original P.

# The list elements effB, effAB and effBB are the ones obtained from the analysis of the
#original (not permuted) data set
an0.e <- emp.qtscore(dm2, ge03d2ex)
descriptives.scan(an0.e, sort = "Pc1df")
qtscore
?gwaa.data
?gwaa.data-class
gwaa.data
??gwaa.data
qc1<-check.marker(ge03d2ex, p.level=0)
summary(qc1)
attributes(qc1)
qc1$idok
head(qc1$snpok)
data1<-ge03d2ex[qc1$idok, qc1$snpok]
s1<-summary(data1@gtdata[(data1@phdata$dm2 == 1),]) 
pexcas1<-s1[,"Pexact"] 
estlambda(pexcas1)
?check.marker
qc1<-check.marker(ge03d2ex, p.level=0)

# This function helps selecting the marker which should enter into GWA analysis based on
# call rate, minor allele frequency, value of the chi-square test for Hardy-Weinberg 
# equilibrium, and redundance, defined as concordance between the distributions of the 
# genotypes (including missing values).

summary(qc1)
attributes(qc1)
qc1$idok
head(qc1$snpok)

# subset the individuals and snps OK
data1<-ge03d2ex[qc1$idok, qc1$snpok]

# recalculate the inflation parameter for new dataset
s1<-summary(data1@gtdata[(data1@phdata$dm2 == 1),]) 
pexcas1<-s1[,"Pexact"] 
estlambda(pexcas1)
?ibs
# Details
# This function facilitates quality control of genomic data. e.g. people with exteremly
# high (close to 1) IBS may indicate duplicated samples (or twins), simply high values 
# of IBS may indicate relatives.

# matrix of genomic kindship between all pairs of individuals 
data1.gkin <-ibs(data1[,data1@gtdata@chromosome != "X"], weight="freq") 

# distance matrix 
data1.dist<-as.dist(0.5-data1.gkin) 

# use classical multidimensional scaling 
data1.mds<-cmdscale(data1.dist) 

# plot the two first components 
plot(data1.mds)
km<-kmeans(data1.mds, centers=2, nstart=1000) 
cl1<-names(which(km$cluster==1)) 
cl2<-names(which(km$cluster==2)) 
data2<-data1[cl1,]

qc2<-check.marker(data2, hweids=(data2@phdata$dm2 ==0), fdr=0.2) 
summary(qc2)
cl1
cl2
####		REMOVING INDIVIDUALS			#####
km<-kmeans(data1.mds, centers=2, nstart=1000) #
cl1<-names(which(km$cluster==1)) #
cl2<-names(which(km$cluster==2)) #
cl2#
#
data2<-data1[cl1,]#
#
qc2<-check.marker(data2, hweids=(data2@phdata$dm2 ==0), fdr=0.2) #
summary(qc2)#
#################################################
#
####		GWA SCAN (QC)					#####
data2<-data2[qc2$idok, qc2$snpok] #
an1<-qtscore(dm2, data2, trait="binomial") #
plot(an1)
# add corrected p-values #
add.plot(an1, df="Pc1df", col="green")
# plot the result of the last analysis. Compare with previous results. 
plot(an1,, col="green") 

# add the previous analysis
add.plot(an0, col="red")

# compare the descriptive stats from both analyses
descriptives.scan(an1)
descriptives.scan(an0)

# select which SNPs between the best10 in previous analysis are now in the best10
rownames(descriptives.scan(an0))[rownames(descriptives.scan(an0))%in%rownames(descriptives.scan(an1))]
rownames(descriptives.scan(an0))[!rownames(descriptives.scan(an0))%in%rownames(descriptives.scan(an1))]
beta <- c(0.16, 0.091, 0.072, -0.03)
s <- c(0.07, 0.042, 0.048, 0.12)
s2 <- s * s
s2
#
#
# Compute the weight for individual studies as
w <- 1/s2
w
#
#
# Estimate pooled regression coefficient as
pbeta <- sum(w * beta)/sum(w)
pbeta

#
# and pooled squared standard error as
ps2 <- 1/sum(w)
ps2
#
#
# To access significance of association in meta-analysis, let us compute chisq test
# statistics and the P-value with
pchi2 <- pbeta * pbeta/ps2
pchi2
#
#
ppvalue <- 1 - pchisq(pchi2, 1)
ppvalue
n<-c(225,560,437,89)#
cor(n,s2)
n<-c(225,560,437,89)#
cor(n,w)
# Load the package
library(GenABEL)
# Load sample data
data(ge03d2ex)

# list available objects
ls()
# which is the class of our data object?
class(ge03d2ex)

# what inside the phenotypic part of the data?
names(phdata(ge03d2ex))
head(phdata(ge03d2ex))

attach(phdata(ge03d2ex))
phdata(ge03d2ex)
gtdata(ge03d2ex)[1:10,1:10]
gtdata(ge03d2ex)
descriptives.trait(ge03d2ex)
# dm2 = diabetes mellitus type 2 / case-controls
# case/controls differences in weight (obesity)
descriptives.trait(ge03d2ex, by=dm2)
# dm2 = diabetes mellitus type 2 / case-controls
# case/controls differences in weight (obesity)
descriptives.trait(ge03d2ex, by=sex)
descriptives.marker(ge03d2ex)
?descriptives.marker
descriptives.marker(ge03d2ex,ids=(dm2==0))
descriptives.marker(ge03d2ex,ids=(dm2==1))
# Apparently, HWE distribution holds better in controls than in the total sample.
# Let us check whether there are indications that deviation from HWE is due
# to cases. At this stage we are only interested in HWE distribution table, and
# therefore will ask to report only table two:

descriptives.marker(ge03d2ex, ids = (dm2 == 1))[2]

# for controls

descriptives.marker(ge03d2ex, ids = (dm2 == 0))[2]
s <- summary(ge03d2ex@gtdata[(dm2 == 1), ])
s[1:10,]


# Note that the column before the last provides P-exact we need. We can extract
# these to a separate vector by


pexcas <- s[, "Pexact"]


# and produce the chisq-chisq plot and estimate inflation factor by command estlambda(),
# which operates with a vector of P-values or chisq:


estlambda(pexcas)
an0 <- qtscore(dm2, ge03d2ex, trait = "binomial")
head(an0)
descriptives.scan(an0)
lambda(an0)

plot(an0,col="black")
add.plot(an0, df = "Pc1df", col = "green")
an0.e <- emp.qtscore(dm2, ge03d2ex)
descriptives.scan(an0.e, sort = "Pc1df")
qc1<-check.marker(ge03d2ex, p.level=0)
summary(qc1)
attributes(qc1)
qc1$idok
head(qc1$snpok)
data1<-ge03d2ex[qc1$idok, qc1$snpok]

# recalculate the inflation parameter for new dataset
s1<-summary(data1@gtdata[(data1@phdata$dm2 == 1),]) 
pexcas1<-s1[,"Pexact"] 
estlambda(pexcas1)
data1.gkin <-ibs(data1[,data1@gtdata@chromosome != "X"], weight="freq")
# distance matrix 
data1.dist<-as.dist(0.5-data1.gkin)
# use classical multidimensional scaling 
data1.mds<-cmdscale(data1.dist)
# plot the two first components 
plot(data1.mds)
km<-kmeans(data1.mds, centers=2, nstart=1000) 
cl1<-names(which(km$cluster==1)) 
cl2<-names(which(km$cluster==2))
cl1
cl2
# Subset the data, discarding “strange” individuals (in cluster2) 
data2<-data1[cl1,]

qc2<-check.marker(data2, hweids=(data2@phdata$dm2 ==0), fdr=0.2) 
summary(qc2)
# Subset the data to individuals and SNP that passed last check
data2<-data2[qc2$idok, qc2$snpok]

# Perform the test for association in this data 
an1<-qtscore(dm2, data2, trait="binomial") 

# Plot the results
plot(an1)

# add corrected p-values 
add.plot(an1, df="Pc1df", col="green")
# Subset the data to individuals and SNP that passed last check
data2<-data2[qc2$idok, qc2$snpok]

# Perform the test for association in this data 
an1<-qtscore(dm2, data2, trait="binomial") 

# Plot the results
plot(an0)

# add corrected p-values 
add.plot(an1, df="Pc1df", col="green")
?read.ftable
require("ape")#
require("seqinr")#
require("MCMCglmm")#
data("aaindex")#
require("methods")#
#
setOldClass("phylo","pyhlo")#
#
#
# CLASS MCMCglmm.list#
setClass("MCMCglmm.list",representation("list"),contains="list",prototype(list()))#
#
# initialize#
setMethod("initialize","MCMCglmm.list",function(.Object,b) {#
		if (sum(unlist(lapply(b,function(x)return(is(x)=="MCMCglmm"))))==length(b)) {#
			.Object@.Data<-b#
		} else {#
			.Object@.Data<-b[unlist(lapply(b,function(x)return(is(x)=="MCMCglmm")))]#
			warning(paste("length of MCMCglmm.list:",sum(unlist(lapply(b,function(x)return(is(x)=="MCMCglmm")))),", length of list:",length(b)))#
		}	#
	.Object#
}#
)#
#
#
# CLASS APPT.list (Alignment Phenotype Properties Tree)#
setClass("APPT.list",representation(alignment="matrix",pheno="data.frame",properties="list",tree="phylo",columns="numeric",multiMCMCglmm.list="list"))#
#
#
#
setMethod("initialize","APPT.list",function(.Object,alignment,pheno,properties,tree,columns=vector("numeric"),multiMCMCglmm.list=list(),nowarns=FALSE) {#
	#
	if (length(tree$tip.label)==dim(alignment)[1]&length(tree$tip.label)==dim(pheno)[1]) {#
		.Object@alignment<-alignment#
		.Object@pheno<-pheno#
		.Object@properties<-properties#
		.Object@tree<-tree	#
		if (length(columns)<1) {.Object@columns<-1:dim(alignment)[2]} else {.Object@columns<-columns}							#
		.Object@multiMCMCglmm.list<-multiMCMCglmm.list#
#
	}  else {#
		if (nowarns==FALSE) warning (paste("Species in alignment:",dim(alignment)[1],", phenotype table:",dim(pheno)[1],"or tree:",length(tree$tip.label)," differs\nEmpty object!!"))	#
		#
	}	#
	.Object#
}#
)#
#
#
#
#
# FUNCTIONS FOR RETIREVING SLOTS VALUES#
# 1- alignment#
setGeneric("alignment",function(value)standardGeneric("alignment"))#
#
setMethod("alignment","APPT.list",function(value){#
		x<-value@alignment#
		x#
}#
)#
#
#
# 2- pheno#
setGeneric("pheno",function(value)standardGeneric("pheno"))#
#
setMethod("pheno","APPT.list",function(value){#
		x<-value@pheno#
		x#
}#
)#
#
# 3- properties#
setGeneric("properties",function(value)standardGeneric("properties"))#
#
setMethod("properties","APPT.list",function(value){#
		x<-value@properties#
		x#
}#
)#
#
# 4- tree#
setGeneric("tree",function(value)standardGeneric("tree"))#
#
setMethod("tree","APPT.list",function(value){#
		x<-value@tree#
		x#
}#
)#
#
# 5- columns#
setGeneric("columns",function(value)standardGeneric("columns"))#
#
setMethod("columns","APPT.list",function(value){#
		x<-value@columns#
		x#
}#
)#
#
# 6- multiMCMCglmm#
setGeneric("multiMCMCglmm",function(value)standardGeneric("multiMCMCglmm"))#
#
setMethod("multiMCMCglmm","APPT.list",function(value){#
		x<-value@multiMCMCglmm.list#
		x#
}#
)#
#
# 7- levelsMCMCglmm#
setGeneric("levelsMCMCglmm",function(value)standardGeneric("levelsMCMCglmm"))#
#
setMethod("levelsMCMCglmm","APPT.list",function(value){#
		x<-colnames(multiMCMCglmm(value)[[1]][[1]]$Sol)#
		x#
}#
)#
#
#
#
# length#
setMethod("length", c("APPT.list"), function(x){#
	if (length(tree(x)$tip.label)==dim(pheno(x))[1]&&length(tree(x)$tip.label)==dim(alignment(x))[1]) {#
		length(tree(x)$tip.label)	#
	} else {#
		print(paste("Error: species in alignment:",dim(alignment)[1],", phenotype table:",dim(pheno)[1],"or tree:",length(tree$tip.label)," differs"))#
	}#
	}#
)#
#
setGeneric("subset.MCMCglmm.list",function(x,value)standardGeneric("subset.MCMCglmm.list"))#
setMethod("subset.MCMCglmm.list","MCMCglmm.list",function(x,value){#
		x@.Data<-x@.Data[value]#
		x#
}#
)#
#
#
#
# subset function for APPT.list: #
# WARNING this function subset only columns and multiMCMCglmm.list slots  #
# getGeneric("[")#
setMethod("[", c("APPT.list", "numeric","ANY", "ANY"),#
          function(x, i,..., drop=TRUE)#
{#
	new.multiMCMCglmm<-lapply(multiMCMCglmm(x),function(z) return(z[i]))	#
   	y<-new("APPT.list",alignment=alignment(x),pheno=pheno(x),properties=properties(x),tree=tree(x),columns=columns(x)[i],multiMCMCglmm.list=new.multiMCMCglmm)#
	y	#
})#
#
setMethod("[", c("APPT.list", "logical","ANY", "ANY"),#
          function(x, i,..., drop=TRUE)#
{#
	new.multiMCMCglmm<-lapply(multiMCMCglmm(x),function(z) return(z[i]))	#
   	y<-new("APPT.list",alignment=alignment(x),pheno=pheno(x),properties=properties(x),tree=tree(x),columns=columns(x)[i],multiMCMCglmm.list=new.multiMCMCglmm)#
	y#
})#
#
#
#
#
# cer.APPT.list#
setGeneric("cer.APPT.list",function(x,class.var,which.columns,maxngaps,nummin,...) standardGeneric("cer.APPT.list"))#
setMethod("cer.APPT.list", c("APPT.list","character","ANY","ANY","ANY"),#
          function(x, class.var,which.columns=NULL,maxngaps=0,nummin=2,...)#
{#
	#
	####	CONDITIONAL ENTROPY REDUCTION		#####
	prop<-properties(x)#
	if (is.null(which.columns)){which.columns<-columns(x)}#
#
	ma.red<-alignment(x)[,which.columns]#
	#
	col2anal<-which(unlist(lapply(apply(ma.red,2,table),length))>=nummin & apply(ma.red,2,function(x)return(sum(x=="-")))<=maxngaps)#
#
	####	CONDITIONAL ENTROPY	#####
	ce<-function(x,y){#
		t1<-table(y,x)/length(x)#
		px<-apply(t1,2,sum)#
		return( -sum(t(log(t(t1)/px,2))*t1,na.rm=TRUE))#
	}#
	#
	vect.ce<-vector("numeric")#
	hy<-vector("numeric")#
	for(i in 1:length(col2anal)){#
		vect.ce[i]<-ce(pheno(x)[,class.var],toupper(ma[,col2anal[i]]))#
		hy[i]<- -sum((table(ma[,col2anal[i]])/dim(ma)[1])*(log(table(ma[,col2anal[i]])/dim(ma)[1],2)))#
	}#
	#
	reduct.H<- 100*(1-vect.ce/hy)#
	names(reduct.H)<-colnames(ma.red[,col2anal])#
	reduct.H		#
})#
#
#
#
#
# anova.APPT.list#
setGeneric("anova.APPT.list",function(x,class.var,which.columns,maxngaps,nummin,...) standardGeneric("anova.APPT.list"))#
setMethod("anova.APPT.list", c("APPT.list","formula","ANY","ANY","ANY"),#
          function(x, class.var,which.columns=NULL,maxngaps=0,nummin=2,...)#
{#
	#
	####	ANOVA				#####
	prop<-properties(x)#
	if (is.null(which.columns)){which.columns<-columns(x)}#
#
	ma.red<-alignment(x)[,which.columns]#
	#
	col2anal<-which(unlist(lapply(apply(ma.red,2,table),length))>=nummin & apply(ma.red,2,function(x)return(sum(x=="-")))<=maxngaps)#
	tab.anova<-matrix(0,length(col2anal),length(prop))#
	#
	for (m in 1:length(prop)) {#
	#
		listaDep<-lapply(col2anal,function(z) prop[[m]][aaa(toupper(ma.red[,z]))])#
		#
		for (i in 1:length(listaDep)) {#
		      Y<-data.frame(DV=listaDep[[i]],pheno(x))#
		      modelD<-lm(formula(paste("DV ~", as.character(class.var)[2])),data=Y)#
		      tab.anova[i,m]<-summary(modelD)$coefficients[2,4]#
		}#
	}#
	colnames(tab.anova)<-names(properties(x))#
	rownames(tab.anova)<-names(listaDep)#
	#
	tab.anova<-as.data.frame(tab.anova)#
	tab.anova$cuantos<-apply(tab.anova,1,function(x)return(sum(x<1e-2)))#
	tab.anova$median<-apply(tab.anova[,1:length(prop)],1,function(x)return(median(-log(x,10))))#
	tab.anova$mean<-apply(tab.anova[,1:length(prop)],1,function(x)return(mean(-log(x,10))))#
#
	tab.anova#
			#
})#
#
#
#
#
# MCMCglmm.APPT.list#
setGeneric("MCMCglmm.APPT.list",function(x,class.var,random.eff,nitt,burnin,prior,scale,parallel,which.columns,maxngaps,nummin,count,...) standardGeneric("MCMCglmm.APPT.list"))#
setMethod("MCMCglmm.APPT.list", c("APPT.list","formula","character","numeric","numeric","list","logical","logical","ANY","ANY","ANY","ANY"),#
          function(x, class.var,random.eff,nitt=1e5,burnin=5e4,prior,scale=FALSE,parallel=FALSE,which.columns=NULL,maxngaps=0,nummin=2,count=4,...)#
{#
#
	prop<-properties(x)#
	if (is.null(which.columns)){which.columns<-columns(x)}#
#
	ma.red<-alignment(x)[,which.columns]#
	#
	col2anal<-which(unlist(lapply(apply(ma.red,2,table),length))>=nummin & apply(ma.red,2,function(x)return(sum(x=="-")))<=maxngaps)#
	tab.anova<-matrix(0,length(col2anal),length(prop))#
	#
	pheno<-pheno(x)#
	tree<-tree(x)#
	#
	toto<-list()#
	for (m in 1:length(prop)) {#
	#
		listaDep<-lapply(col2anal,function(z) prop[[m]][aaa(toupper(ma.red[,z]))])#
		#
		titi<-list()#
		testObject <- function(object){exists(as.character(substitute(object)))}#
		#
		if (parallel==TRUE) {#
			#
			suppressMessages(require(doMPI))#
			cl <- startMPIcluster(count=count,comm=7)#
			registerDoMPI(cl)#
						#
			titi<-foreach(n=1:length(listaDep),.packages=c('ape','MCMCglmm','seqinr'))%dopar%{#
				Y<-data.frame(DV=listaDep[[n]],pheno)#
			    Y$animal<-Y[,random.eff]#
			    MCMCglmm(formula(paste("DV ~", as.character(class.var)[2])),data=Y,random=~animal,family="gaussian",rcov=~units,pedigree=tree,nitt=nitt,prior=prior,scale=FALSE,burnin=burnin,verbose=FALSE)#
			      #
			}#
#
		} else {#
			#
			for (i in 1:length(listaDep)) {#
			      Y<-data.frame(DV=listaDep[[i]],pheno(x))#
			      Y$animal<-Y[,random.eff]#
			      modelD<-MCMCglmm(formula(paste("DV ~", as.character(class.var)[2])),data=Y,random=~animal,family="gaussian",rcov=~units,pedigree=tree(x),nitt=nitt,prior=prior,scale=FALSE,burnin=burnin,verbose=FALSE)#
			      titi[[i]]<-modelD#
			}#
		}#
				#
		#
		#
		names(titi)<-names(listaDep)#
		x@columns<-as.numeric(names(listaDep))#
		x@multiMCMCglmm.list[[m]]<-titi#
	}#
	names(x@multiMCMCglmm.list)<-names(properties(x))#
	x			#
})#
#
#
# summary.APPT.list#
setGeneric("summary.APPT.list",function(x,contrast,class.var,...) standardGeneric("summary.APPT.list"))#
setMethod("summary.APPT.list", c("APPT.list","character","character"),#
          function(x,contrast,class.var,...)#
{#
	#
	#
	tabla<-matrix(0,length(multiMCMCglmm(x)[[1]]),length(multiMCMCglmm(x)))#
	tabSize<-matrix(0,length(multiMCMCglmm(x)[[1]]),length(multiMCMCglmm(x)))#
	lrange<-lapply(properties(x),function(z) range(z)[2]-range(z)[1])#
	toto<-multiMCMCglmm(x)#
	prop<-names(toto)#
	#
	# gt0: tabla#
	# Effect Size:	tabSize#
	for (m in 1:length(prop)) {#
		for (i in 1:length(multiMCMCglmm(x)[[1]])) {#
			tabla[i,m]<-sum((toto[[m]][[i]]$Sol[,contrast[1]]-toto[[m]][[i]]$Sol[,contrast[2]])>0)/length(toto[[m]][[i]]$Sol[,contrast[2]])	#
			#
			tabSize[i,m]<-median(toto[[m]][[i]]$Sol[,contrast[1]]-toto[[m]][[i]]$Sol[,contrast[2]])/lrange[[m]]	#
		}#
	}#
	#
	colquevan<-columns(x)#
	#
	colnames(tabla)<-prop#
	rownames(tabla)<-colquevan#
	tabcor<-2*(tabla-0.5)#
#
#
	colnames(tabSize)<-prop#
	rownames(tabSize)<-colquevan#
#
	#
	resultado<-apply(tabcor^2,1,sum)/dim(tabcor)[2]#
	names(resultado)<-rownames(tabcor)#
	#
#
	liAAs<-list()#
	for (i in 1:length(colquevan)) { 	#
		mdf<-data.frame(seq=row.names(alignment(x)),pos=alignment(x)[,colquevan[i]],patho=pheno(x)[,class.var])#
		liAAs[[i]]<-table(mdf$patho,mdf$pos)/apply(table(mdf$patho,mdf$pos),1,sum)	#
	}#
	names(liAAs)<-names(resultado)#
	rm(toto)#
#
	out<-list()#
	out[[1]]<-tabla#
	out[[2]]<-tabSize#
	out[[3]]<-resultado#
	out[[4]]<-liAAs#
	out#
	#
})
aaindex
propiedades<-lapply(aaindex,function(x) return(x$D))
length(propiedades)
cuales<-grep("Kyte",propiedades,ignore.case=TRUE)
sum(cuales)
cuales
aaindex[cuales]
library(LUCIA)
?MCMCglmm.APPT.list
?MCMCglmm
library(LUCIA)
?MCMCglmm.APPT.list
library(LUCIA)
data("rpoS")
labels
tree7
plot(tree7)
rpoS
?multiMCMCglmm
library(LUCIA)
data("rpoS")
?rpoS
?rpoSalign
library(LUCIA)
data("rpoS")
?rpoS
data("aaindex")
aaindex[c("LEVM780101","LEVM780102","CHOC760102","JOND920102","KYTJ820101")]
library(lucia)
library(LUCIA)
?APPT.list
?APPT.list-class
data("rpoS")#
data("aaindex")#
#
# select the properties to consider from "aaindex"#
prop<-c("JOND920102","KYTJ820101")#
#
# instantiate the APPT.list object#
myAPPT.list<-new("APPT.list",alignment=rpoSalign,pheno=labels,properties=lapply(aaindex[prop],function(x) x$I),tree=tree7)#
colu<-c(376)
myAPPT.list<-MCMCglmm.APPT.list(myAPPT.list, ~ -1+pathogenicity,random.eff="spKEGG",nitt=1.5e2,burnin=5e1,prior,scale=FALSE,parallel=TRUE,which.columns=colu,maxngaps=10,nummin=2,count=4,pr=FALSE)
prior<-list(R=list(V=1, nu=1), G=list(G1=list(V=1, nu=1)))#
myAPPT.list<-MCMCglmm.APPT.list(myAPPT.list, ~ -1+pathogenicity,random.eff="spKEGG",nitt=1.5e2,burnin=5e1,prior,scale=FALSE,parallel=TRUE,which.columns=colu,maxngaps=10,nummin=2,count=4,pr=FALSE)
colu<-c(374,376)
prior<-list(R=list(V=1, nu=1), G=list(G1=list(V=1, nu=1)))#
myAPPT.list<-MCMCglmm.APPT.list(myAPPT.list, ~ -1+pathogenicity,random.eff="spKEGG",nitt=1.5e2,burnin=5e1,prior,scale=FALSE,parallel=TRUE,which.columns=colu,maxngaps=10,nummin=2,count=4,pr=FALSE)
colu<-c(376)
class(colu)
length(colu)
prior<-list(R=list(V=1, nu=1), G=list(G1=list(V=1, nu=1)))#
myAPPT.list<-MCMCglmm.APPT.list(myAPPT.list, ~ -1+pathogenicity,random.eff="spKEGG",nitt=1.5e2,burnin=5e1,prior,scale=FALSE,parallel=TRUE,which.columns=as.vector(colu),maxngaps=10,nummin=2,count=4,pr=FALSE)
colu<-c(374)
prior<-list(R=list(V=1, nu=1), G=list(G1=list(V=1, nu=1)))#
myAPPT.list<-MCMCglmm.APPT.list(myAPPT.list, ~ -1+pathogenicity,random.eff="spKEGG",nitt=1.5e2,burnin=5e1,prior,scale=FALSE,parallel=TRUE,which.columns=as.vector(colu),maxngaps=10,nummin=2,count=4,pr=FALSE)
library(LUCIA)
?summaryAPPT.list
?MCMCglmm.APPT.list
data("rpoS")#
data("aaindex")#
#
# select the properties to consider from "aaindex"#
prop<-c("JOND920102","KYTJ820101")#
#
# instantiate the APPT.list object#
myAPPT.list<-new("APPT.list",alignment=rpoSalign,pheno=labels,properties=lapply(aaindex[prop],function(x) x$I),tree=tree7)#
#
# calculate ANOVA: fast approach#
my.anova<-anova.APPT.list(myAPPT.list,class.var=~pathogenicity,which.columns=NULL,nummin=2,maxngaps=10)#
sum(my.anova$cuantos==4,na.rm=TRUE)#
which(my.anova$cuantos==4)
my.anova<-anovaAPPT.list(myAPPT.list,class.var=~pathogenicity,which.columns=NULL,nummin=2,maxngaps=10)#
sum(my.anova$cuantos==4,na.rm=TRUE)#
which(my.anova$cuantos==4)
my.cer<-cer.APPT.list(myAPPT.list,nummin=2,maxngaps=10)
my.cer<-cer.APPT.list(myAPPT.list)
my.cer<-cer.APPT.list(myAPPT.list,class.var="pathogenicity",which.columns=NULL,nummin=2,maxngaps=10)
my.cer
# cer.APPT.list#
setGeneric("cer.APPT.list",function(x,class.var,which.columns=NULL,maxngaps=0,nummin=2,...) standardGeneric("cer.APPT.list"))#
setMethod("cer.APPT.list", c("APPT.list","character","ANY","ANY","ANY"),#
          function(x, class.var,which.columns=NULL,maxngaps=0,nummin=2,...)#
{#
	#
	####	CONDITIONAL ENTROPY REDUCTION		#####
	prop<-properties(x)#
	if (is.null(which.columns)){which.columns<-columns(x)}#
#
	ma.red<-alignment(x)[,which.columns]#
	#
	col2anal<-which(unlist(lapply(apply(ma.red,2,table),length))>=nummin & apply(ma.red,2,function(x)return(sum(x=="-")))<=maxngaps)#
#
	####	CONDITIONAL ENTROPY	#####
	ce<-function(x,y){#
		t1<-table(y,x)/length(x)#
		px<-apply(t1,2,sum)#
		return( -sum(t(log(t(t1)/px,2))*t1,na.rm=TRUE))#
	}#
	#
	vect.ce<-vector("numeric")#
	hy<-vector("numeric")#
	for(i in 1:length(col2anal)){#
		vect.ce[i]<-ce(pheno(x)[,class.var],toupper(ma.red[,col2anal[i]]))#
		hy[i]<- -sum((table(ma.red[,col2anal[i]])/dim(ma.red)[1])*(log(table(ma.red[,col2anal[i]])/dim(ma.red)[1],2)))#
	}#
	#
	reduct.H<- 100*(1-vect.ce/hy)#
	names(reduct.H)<-colnames(ma.red[,col2anal])#
	reduct.H		#
})
my.cer<-cer.APPT.list(myAPPT.list,class.var="pathogenicity")
my.cer<-cer.APPT.list(myAPPT.list,class.var="pathogenicity",maxngaps=10,nummin=2)
library(LUCIA)
data("rpoS")
?bootMCMCglmm.APPT.list
library(LUCIA)
citation("LUCIA")
vignette("LUCIA")
library(bcool)
vignette("bcool")
citation("bcool")
?MCMCglmm.APPT.list
library(ape)
library(MCMCglmm)
library(doMPI)
library(bcool)
?require
library(bcool)
vignette("bcool")
library(bcool)
vignette("bcool")
library(bcool)
vignette("bcool")
library(bcool)
vignette(bcool)
vignette("bcool")
library(bcool)
help("INSTALL")
points(1:10,2:11)
#################################
# Lessa analysis for the paper ##
#################################
library(MCMCglmm)#
library(bmaMCMCanalysis)#
suppressMessages(library(doMPI))#
cl <- startMPIcluster(count=8,comm=7)#
registerDoMPI(cl)#
#
# phenotipic data#
# Test the bivariate regression of functional morphological characters#
# on behavioural characters#
#
# Functional morphological characters #
proc<-c(115.3,81.8,89.6,88,118,94.6,102.4,NA)#
proc.cat<-c(4,1,1,1,4,2,3,1)#
#
mandibular<-c(1.93,1.60,1.59,1.43,2.38,2.38,2.06,NA)#
mandibular.cat<-c(2,1,1,1,3,3,3,1)#
#
epicondylar<-c(0.29,0.25,0.24,0.22,0.31,0.32,0.32,NA)#
epicondylar.cat<-c(2,1,1,1,2,2,2,1)#
#
olecranon<-c(0.19,0.13,0.13,0.13,0.19,0.20,0.21,NA)#
olecranon.cat<-c(2,1,1,1,2,2,2,1)#
#
cross.section<-c(0.40,0.31,0.43,0.47,0.78,0.85,0.59,NA)#
cross.section.cat<-c(1,1,1,1,3,3,2,1)#
#
#
#
# Behavioural characters (independent variables)#
burrow<-c(5,3,2,3,5,5,5,1)#
fossoriality<-c(2,1,1,1,2,2,2,0)#
#
species<-c("Scyanus","Odegus","Ogliroides","Tbarrerae","Cleucodon","Caustralis","Ctalarum","Tapereoides")#
# Dataframe with both kind of data and also animal identification#
mydata<-data.frame("proc"=proc,"proc.cat"=proc.cat,"mandibular"=mandibular,"mandibular.cat"=mandibular.cat,"burrow"=burrow,"fossoriality"=fossoriality,"epicondylar"=epicondylar,"epicondylar.cat"=epicondylar.cat,"olecranon"=olecranon,"olecranon.cat"=olecranon.cat,"cross.section"=cross.section,"cross.section.cat"=cross.section.cat,"animal"=species)#
#mydata<-mydata[-8,]#
#
# Tree data: 10001 trees#
#myLtrees<-read.nexus("7cytb_aln.fa.trees")#
#
# three Trees: MtMam+I+G,MtMam+G,MtMam+I #
myLtrees<-read.nexus("8cytb.MtMam.tree")#
rooted.trees<-myLtrees#
for(i in 1:length(myLtrees)){rooted.trees[[i]]<-root(myLtrees[[i]],outgroup="Tapereoide",resolve.root=TRUE)}#
#
#
#
set.seed(123456)#
# sampling of trees#
#sampledTrees<-myLtrees[sample(1:length(myLtrees),1000)]#
#
# MCMC runs#
titi<-list()#
#
titi<-foreach(i=1:length(rooted.trees),.packages=c('ape','MCMCglmm'))%dopar%{#
       prior=list(R=list(V=diag(2),nu=2), G=list(G1=list(V=diag(2), nu=2))) #
       #rooted.tree<-(sampledTrees[[i]])#
       MCMCglmm(cbind(proc.cat,mandibular.cat)~trait:burrow,data=mydata,random=~us(trait):animal,family=c("gaussian","gaussian"),rcov=~us(trait):units,pedigree=myLtrees[[i]],prior=prior,nitt=1.1e7,burnin=1e6,thin=1000,verbose=FALSE)#
       #
}
install.packages("methods")
install.packages("methods",dependencies=T)
library(bcool)
library(methods)
basic
basic()
base
base()
?base
library(help="base")
ls()
library(MCMCglmm)#
library(bmaMCMCanalysis)#
setwd("/Users/kangu/Lucia/bmaMMCanalysis/ejemploDumont/dumont_revisited")#
#
####################
# phenotipic data ##
####################
# La recombination rate se puede medir como: num. de informative meiosis (meiosis)#
gen.map.len<-c(3615,2013,2275,1361,1542,2770,3300,3884,2286,3588,3160,829,644,NA)#
num.marker<-c(5136,352,326,6336,3824,742,253,2773,1042,1015,3960,64,83,NA)#
ave.marker<-c(0.704,5.719,6.979,0.215,0.403,3.733,13.043,1.401,2.194,3.535,0.798,12.953,7.759,NA)#
meiosis<-c(880,865,NA,92,41.4,71.8,82,297,78,128,203,353,162,NA)#
# O tambien como: gen.map.len/physical.gen.len#
phys.gen.len<-c(3191,3100,3100,2600,2800,2700,3000,2500,3000,3000,3000,3700,3500,NA)#
rec.rate<-c(1.133,0.649,0.734,0.523,0.551,1.026,1.100,1.554,0.762,1.196,1.053,0.224,0.184,NA)#
chak.cor<-c(1.144,0.732,0.838,0.527,0.747,1.128,1.348,1.604,0.796,1.260,1.070,0.309,0.228,NA)#
#
Y<-cbind(gen.map.len,num.marker,ave.marker,meiosis,phys.gen.len,rec.rate,chak.cor)#
Y<-as.data.frame(Y)#
colnames(Y)<-c("gen.map.len","num.marker","ave.marker","meiosis","phys.gen.len","rec.rate","chak.cor")#
Y$animal<-c("Human", "Baboon", "Macaque", "Mouse", "Rat", "Horse", "Cat", "Dog", "Pig", "Sheep", "Cow","Wallaby","Opossum","Platypus")#
#
# Tree data: 10001 trees#
mytrees<-read.nexus("CFTR_dumont_revision.trees")#
# sampling of trees#
sampledTrees<-mytrees[sample(1:length(mytrees),1000)]#
##############
# MCMC runs
library(MCMCglmm)#
library(bmaMCMCanalysis)#
setwd("/Users/kangu/Lucia/bmaMMCanalysis/ejemploDumont/dumont_revisited/")#
#
####################
# phenotipic data ##
####################
# La recombination rate se puede medir como: num. de informative meiosis (meiosis)#
gen.map.len<-c(3615,2013,2275,1361,1542,2770,3300,3884,2286,3588,3160,829,644,NA)#
num.marker<-c(5136,352,326,6336,3824,742,253,2773,1042,1015,3960,64,83,NA)#
ave.marker<-c(0.704,5.719,6.979,0.215,0.403,3.733,13.043,1.401,2.194,3.535,0.798,12.953,7.759,NA)#
meiosis<-c(880,865,NA,92,41.4,71.8,82,297,78,128,203,353,162,NA)#
# O tambien como: gen.map.len/physical.gen.len#
phys.gen.len<-c(3191,3100,3100,2600,2800,2700,3000,2500,3000,3000,3000,3700,3500,NA)#
rec.rate<-c(1.133,0.649,0.734,0.523,0.551,1.026,1.100,1.554,0.762,1.196,1.053,0.224,0.184,NA)#
chak.cor<-c(1.144,0.732,0.838,0.527,0.747,1.128,1.348,1.604,0.796,1.260,1.070,0.309,0.228,NA)#
#
Y<-cbind(gen.map.len,num.marker,ave.marker,meiosis,phys.gen.len,rec.rate,chak.cor)#
Y<-as.data.frame(Y)#
colnames(Y)<-c("gen.map.len","num.marker","ave.marker","meiosis","phys.gen.len","rec.rate","chak.cor")#
Y$animal<-c("Human", "Baboon", "Macaque", "Mouse", "Rat", "Horse", "Cat", "Dog", "Pig", "Sheep", "Cow","Wallaby","Opossum","Platypus")#
#
# Tree data: 10001 trees#
mytrees<-read.nexus("CFTR_dumont_revision.trees")#
# sampling of trees#
sampledTrees<-mytrees[sample(1:length(mytrees),1000)]#
##############
# MCMC runs
library("bcool")
?phylo
?"phylo-class"
??"phylo-class"
library("bcool")
vignette("bcool")
setwd("Lucia/bcool/inst/doc/")
Sweave("bcool.Snw")
