#Test of Linkage disequilibrium in autotetraploid species #To be used with Splus or R # To create the LD4x function, write in Splus or R : source("LD4X_revised2013.prog") # To use the programm, type for example: LD4X(data="inputLD.txt") # or LD4X(data="C:/Mydirectory/Myproject/inputLD.txt") # to analyse the input data file inputLD.txt LD4X<-function(data) { # import the input texte file in Splus inputp<-read.table(data,head=T) #initialisation of output file resutot<-data.frame(t(rep(NA,9))) names(resutot)<-c("population","marker1","allele_m1","freq_all_m1","marker2", "allele_m2","freq_all_m2","p_of_ftest","p_bonferroni") #To ask the user how many and which populations he wants to study print("How many populations do you want to analyse ?") np<-scan("",n=1) print("Enter the code of each population you want to analyse") print("Press Return after each population") pop<-scan("",n=np) #To give information to the user on the progress for (p in 1:np) { print("You are analysing population no:") print(pop[p]) input<-inputp[inputp$Population==pop[p],] print("The dimensions of the dataset for this population are: ") print(dim(input)) print("names of columns of the input file") print(names(input)) #Number of markers nm<-(dim(input)[2]-1)/4 print("Number of markers") print(nm) #Calculation of allelic frequencies #initialisation of output file frequ<-data.frame(matrix((rep(NA,nm*max(input[,-c(1)],na.rm=T))),,nm)) names(frequ)<-names(input)[seq(2,dim(input)[2],by=4)] for (mk in 1:nm) { for (all in 1:max(input[,-c(1)],na.rm=T)) { fre<-input[,c((4*mk-2):(4*mk+1))] fr<-c(fre[,1],fre[,2],fre[,3],fre[,4]) fre2<-fre[fre==all] fre3<-fre2[!is.na(fre2)] fr2<-fr[!is.na(fr)] fr2<-fr2[fr2!=0] f<-length(fre3)/(length(fr2)) frequ[all,mk]<-f } } #Test of linkage disequilibrium #A file "bid" is built with the information (False or True) of presence or absence of the allele #for the pair of markers studied, for each individual and each allelic position #then a 5x5 contingency table is filled with the number of individuals in each configuration. This table is submitted to a Fisher exact test m1<-1 m2<-2 for (m1 in 1:(nm-1)) { for (m2 in (m1+1):nm) { print("The program is working on markers : ") print(names(input)[4*m1-2]);print(names(input)[4*m2-2]) # total number of alleles per marker nt1<-max(input[,(4*m1-2):(4*m1+1)],na.rm=T) nt2<-max(input[,(4*m2-2):(4*m2+1)],na.rm=T) # All alleles of each marker are treated by pairs a1<-1 a2<-1 for (a1 in 1:nt1) { for (a2 in 1:nt2) { bid<-NULL bid<-cbind(input[,(4*m1-2):(4*m1+1)]==c(a1,a1,a1,a1), input[,(4*m2-2):(4*m2+1)]==c(a2,a2,a2,a2)) bidtri<-bid #Sort the F and T for each individual for (i in 1:dim(input)[1]) bidtri[i,1:4]<-bid[i,1:4][order(bid[i,1:4])] for (i in 1:dim(input)[1]) bidtri[i,5:8]<-bid[i,5:8][order(bid[i,5:8])] #the file is transformed into a data frame bidtri<-data.frame(bidtri) #The contingency table is named: conting conting<-matrix(rep(NA,25),5,5) conting[1,1]<-dim(bidtri[bidtri[,1]==T&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==T&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[1,2]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==T&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[1,3]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==T&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[1,4]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==T &bidtri[,5]==T&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[1,5]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==F &bidtri[,5]==T&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[2,1]<-dim(bidtri[bidtri[,1]==T&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[2,2]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[2,3]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[2,4]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[2,5]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==F &bidtri[,5]==F&bidtri[,6]==T&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[3,1]<-dim(bidtri[bidtri[,1]==T&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[3,2]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[3,3]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[3,4]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[3,5]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==F &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==T&bidtri[,8]==T,])[1] conting[4,1]<-dim(bidtri[bidtri[,1]==T&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==T,])[1] conting[4,2]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==T,])[1] conting[4,3]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==T,])[1] conting[4,4]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==T,])[1] conting[4,5]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==F &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==T,])[1] conting[5,1]<-dim(bidtri[bidtri[,1]==T&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==F,])[1] conting[5,2]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==T&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==F,])[1] conting[5,3]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==T&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==F,])[1] conting[5,4]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==T &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==F,])[1] conting[5,5]<-dim(bidtri[bidtri[,1]==F&bidtri[,2]==F&bidtri[,3]==F&bidtri[,4]==F &bidtri[,5]==F&bidtri[,6]==F&bidtri[,7]==F&bidtri[,8]==F,])[1] #Contingency Fisher test f<-fisher.test(conting, workspace = 5000000) #Bonferroni correction #calculation of the number of comparisons between pairs of markers nc<-length(frequ[,m1][(frequ[,m1]>0)])*length(frequ[,m2][(frequ[,m2]>0)]) #the ouput "resu" contains the population number, the name of a marker, # the name of the alleles of this marker, the frequency of this allele, # the name of another marker, the names of the alleles of this other marker, # the frequency of this allele, the probability of the Fisher exact test and # the threshold for the test after a Bonferroni correction resu<-c(pop[p],names(input)[1+4*m1-3],a1,frequ[a1,m1],names(input)[1+4*m2-3],a2,frequ[a2,m2], f$p.value,0.05/(nc/2)) #the file "resutot" contains the results for each pair of alleles resutot<-rbind(resutot,resu) } } } } } # remove the first line of resutot filled with NA's resutot<-resutot[-c(1),] row.names(resutot)<-c(1:dim(resutot)[1]) # To export this file in a text format write.table(resutot,"outputLD.txt") }