introduction.Rmd
McCOILR is simply an Rcpp implementation of [THEREALMcCOIL] (https://github.com/Greenhouse-Lab/THEREALMcCOIL), which was written solely to make running the software easier within the cluster framework I use. All rights refer to the writers of the original c code.
The package can be installed as follows, assuming devtools has been installed.
## first let's install the package
# devtools::install_github("OJWatson/McCOILR")
## Load the package
library(McCOILR)
The package carries out the same 2 R functions as before, which are demonstrated below:
## categorical test
# read in demo data and view it
data0 = read.table(system.file("extdata","cat_input_test.txt",package="McCOILR"), head=T)
data=data0[,-1]
rownames(data)=data0[,1]
# view the heterozygosity calls
head(data)
## site1 site2 site3 site4 site5 site6 site7 site8 site9 site10 site11 site12
## ind1 1.0 0.5 1 0 1 0.0 -1 -1.0 -1.0 0 1.0 0
## ind2 0.0 0.5 1 1 1 0.0 -1 0.0 -1.0 0 0.5 -1
## ind3 0.5 0.0 1 1 -1 0.0 1 0.5 -1.0 0 0.5 0
## ind4 0.5 0.0 1 -1 1 0.0 1 0.0 -1.0 0 1.0 0
## ind5 0.5 0.0 1 1 1 0.0 1 0.5 0.5 0 0.5 0
## ind6 -1.0 0.0 1 -1 -1 0.5 -1 0.0 -1.0 0 1.0 -1
## site13 site14 site15 site16 site17 site18 site19 site20 site21 site22
## ind1 1.0 1.0 -1 1 0 1 0 -1 0.5 -1.0
## ind2 0.5 1.0 -1 1 -1 1 -1 -1 0.0 1.0
## ind3 0.5 0.0 -1 1 0 1 -1 -1 0.0 1.0
## ind4 0.0 1.0 -1 1 0 1 0 1 0.0 1.0
## ind5 0.5 0.5 0 1 -1 1 -1 1 0.5 0.5
## ind6 -1.0 -1.0 -1 1 -1 -1 -1 -1 -1.0 1.0
## site23 site24 site25 site26 site27 site28 site29 site30 site31 site32
## ind1 1 0.5 1 -1.0 -1.0 -1.0 1 0.5 1 1
## ind2 1 0.5 1 -1.0 0.0 -1.0 1 0.5 1 1
## ind3 1 0.5 1 -1.0 1.0 1.0 1 0.5 1 1
## ind4 1 1.0 1 0.5 1.0 0.5 1 1.0 1 1
## ind5 1 1.0 1 0.5 0.5 0.5 1 0.0 1 1
## ind6 1 -1.0 1 -1.0 -1.0 -1.0 1 0.5 -1 -1
## site33 site34 site35 site36 site37 site38 site39 site40 site41 site42
## ind1 1 0.5 1 -1.0 1 0.0 0 -1 0 -1.0
## ind2 1 0.5 0 -1.0 -1 0.5 1 -1 -1 -1.0
## ind3 1 0.0 1 0.0 -1 0.5 0 1 -1 -1.0
## ind4 1 0.0 1 0.0 -1 0.5 0 -1 0 -1.0
## ind5 1 1.0 1 0.5 1 0.0 0 0 0 0.5
## ind6 -1 1.0 -1 -1.0 -1 -1.0 0 -1 -1 -1.0
## site43 site44 site45 site46 site47 site48 site49 site50 site51 site52
## ind1 0 1 1.0 -1 0.5 0 0.5 -1 0 0.5
## ind2 0 1 1.0 -1 1.0 0 -1.0 0 0 1.0
## ind3 0 1 0.5 -1 1.0 0 -1.0 0 0 -1.0
## ind4 0 1 0.0 -1 0.5 0 1.0 0 0 0.0
## ind5 0 1 0.5 0 0.5 0 1.0 0 0 0.5
## ind6 0 1 -1.0 -1 -1.0 -1 -1.0 0 0 -1.0
## site53 site54 site55 site56 site57 site58 site59 site60 site61 site62
## ind1 0 0.0 0.5 1 1 -1 1.0 -1 0.0 1
## ind2 0 0.5 0.0 1 1 -1 0.5 -1 0.0 1
## ind3 0 0.0 0.0 -1 1 -1 0.5 -1 0.0 1
## ind4 0 0.0 0.0 1 1 1 1.0 1 0.0 1
## ind5 0 0.5 0.0 1 1 1 0.5 1 0.0 1
## ind6 0 -1.0 0.0 -1 1 -1 -1.0 -1 0.5 1
## site63 site64 site65 site66 site67 site68 site69 site70 site71 site72
## ind1 0 1.0 -1.0 1 1 -1 0.0 0.0 0 1
## ind2 0 1.0 -1.0 1 -1 0 0.0 0.5 0 -1
## ind3 0 1.0 0.5 1 -1 0 0.5 1.0 0 -1
## ind4 0 1.0 -1.0 1 1 1 0.0 0.0 0 -1
## ind5 0 1.0 0.0 1 1 -1 0.0 0.0 0 1
## ind6 0 0.5 -1.0 -1 -1 0 -1.0 0.5 -1 -1
## site73 site74 site75 site76 site77 site78 site79 site80 site81 site82
## ind1 0 -1 1 -1 0.0 0.5 0.0 0 0.0 0.0
## ind2 0 -1 1 -1 -1.0 0.5 1.0 -1 0.5 0.0
## ind3 0 -1 1 -1 -1.0 0.5 0.5 -1 0.0 0.5
## ind4 0 1 0 -1 1.0 1.0 1.0 0 0.0 0.0
## ind5 0 -1 1 1 0.5 -1.0 0.5 -1 0.0 0.0
## ind6 0 -1 1 -1 -1.0 0.0 0.0 -1 0.0 -1.0
## site83 site84 site85 site86 site87 site88 site89 site90 site91 site92
## ind1 0.5 0 0 0 0 1 1 0 0 0
## ind2 -1.0 0 0 -1 0 1 -1 -1 -1 0
## ind3 -1.0 0 0 -1 0 1 -1 0 -1 0
## ind4 1.0 0 0 0 0 1 1 0 -1 0
## ind5 0.5 0 0 0 0 1 1 0 0 0
## ind6 -1.0 0 -1 -1 0 -1 -1 -1 -1 0
## site93 site94 site95 site96 site97 site98 site99 site100 site101 site102
## ind1 -1.0 1 0 1 0.5 -1.0 -1 -1 0.0 1
## ind2 -1.0 1 0 1 0.0 1.0 0 1 0.0 1
## ind3 -1.0 1 0 1 1.0 0.5 0 1 0.0 1
## ind4 0.5 1 0 1 1.0 1.0 0 1 0.0 1
## ind5 0.0 1 0 1 0.5 0.5 -1 -1 0.0 0
## ind6 -1.0 1 -1 1 1.0 -1.0 0 1 0.5 -1
## site103 site104 site105
## ind1 1 -1 -1
## ind2 1 -1 -1
## ind3 1 -1 -1
## ind4 1 0 -1
## ind5 1 0 1
## ind6 -1 0 -1
# create results directory and run analysis
dir.create(path = "cat_output")
out_cat <- McCOIL_categorical(data,maxCOI=25, threshold_ind=20, threshold_site=20,
totalrun=1000, burnin=100, M0=15, e1=0.05, e2=0.05,
err_method=3, path="cat_output", output="output_test.txt" )
## Time = 1.00 s
## proportional test
# read in demo data and view it
dataA1i = read.table(system.file("extdata","prop_dataA1_test.txt",package="McCOILR"), head=T)
dataA2i = read.table(system.file("extdata","prop_dataA2_test.txt",package="McCOILR"), head=T)
dataA1= dataA1i[,-1]
dataA2= dataA2i[,-1]
rownames(dataA1)= dataA1i[,1]
rownames(dataA2)= dataA2i[,1]
# view the read counts
head(dataA1)
## site1 site2 site3 site4 site5 site6 site7 site8
## ind1 0.773019 0.435251 6.523575 5.529429 10.301879 0.635886 0.000000 4.634959
## ind2 0.997224 0.336538 0.000000 0.254333 0.642381 0.535586 0.061059 6.542795
## ind3 4.476989 1.684320 2.130197 4.353612 0.524203 0.249656 0.080993 2.019209
## ind4 7.815435 3.313494 12.697070 8.285221 3.011241 1.013239 0.037105 6.032118
## ind5 7.931132 0.442517 0.167996 2.177373 0.587036 0.300895 0.030904 1.024954
## ind6 2.458607 0.638248 3.887696 6.067082 9.065840 0.905267 0.081333 4.586606
## site9 site10 site11 site12 site13 site14 site15 site16
## ind1 0.195470 13.927895 2.363394 5.382498 3.024445 5.423714 0.000000 7.219236
## ind2 0.039037 0.000000 -1.000000 0.754422 3.324416 0.255289 0.001773 0.153461
## ind3 0.000000 11.818180 0.069298 0.673240 3.098849 1.631299 0.030687 2.452794
## ind4 0.000000 8.047423 0.000000 7.221847 2.497320 3.533402 0.000000 11.100150
## ind5 0.081224 6.033347 -1.000000 1.568639 1.279992 0.163047 0.159506 3.586462
## ind6 0.311690 7.699633 2.643740 2.744867 3.400377 6.825433 0.000000 5.841971
## site17 site18 site19 site20 site21 site22 site23 site24
## ind1 0.000000 0.321880 1.035684 0.000000 0 0 0.000000 6.416304
## ind2 0.000000 0.191307 0.460990 0.000000 0 -1 -1.000000 0.000000
## ind3 0.000000 0.459942 0.837007 0.229977 0 0 0.000000 4.429880
## ind4 0.161285 0.703449 1.271100 0.000000 0 0 0.103252 7.916089
## ind5 0.064790 0.190289 0.379542 0.000000 0 0 0.000000 1.037307
## ind6 0.176968 0.372723 1.110334 0.000000 0 0 0.000000 8.487780
## site25 site26 site27 site28 site29 site30 site31
## ind1 16.064404 0.197101 10.157699 0.212330 9.499981 9.835422 2.055015
## ind2 1.726730 0.190073 0.000000 -1.000000 -1.000000 4.855352 0.000000
## ind3 11.425260 0.232608 6.536019 0.312552 0.708249 8.368239 2.397373
## ind4 8.395237 0.035063 10.168160 0.157301 8.480841 6.388393 7.613878
## ind5 4.754813 0.007692 3.858720 0.184479 3.152692 5.864166 0.316654
## ind6 13.421972 0.144583 8.898299 0.268645 3.192409 9.806597 5.342317
## site32 site33 site34 site35 site36 site37 site38 site39
## ind1 5.926235 7.078459 0.000000 0.789787 0.815353 0.000000 0.000000 0.261659
## ind2 0.000000 0.384062 0.000000 0.330539 0.350350 0.218452 0.198614 0.646294
## ind3 9.008590 0.639089 0.019107 0.256107 0.391928 0.000000 0.157776 0.181682
## ind4 6.477691 2.964806 0.594425 1.565033 0.501707 0.476174 0.016512 2.729292
## ind5 0.477997 0.202001 0.000000 0.321333 0.275677 7.588091 -1.000000 0.005372
## ind6 7.105007 7.560561 0.000000 4.308457 0.590327 0.000000 0.000000 0.805857
## site40 site41 site42 site43 site44 site45 site46 site47
## ind1 6.073929 0.000000 0.000000 2.331914 0.000000 3.208030 1.332707 7.534703
## ind2 0.250292 1.099951 0.041389 2.158835 0.000000 3.220459 0.940977 7.443709
## ind3 1.581731 0.082319 0.000000 2.443111 0.000000 3.197772 0.273793 3.723208
## ind4 4.065335 0.599497 0.000000 10.687130 0.000000 4.860701 0.768297 10.746410
## ind5 2.848523 0.271334 0.000000 0.201186 2.899116 1.124923 0.000000 4.235002
## ind6 7.973652 0.000000 0.000000 5.407707 0.000000 3.657817 1.061532 6.957743
## site48 site49 site50 site51 site52 site53 site54 site55
## ind1 0.332472 8.247594 1.611010 0.211831 2.414855 0 2.047866 5.552769
## ind2 0.737532 8.433792 2.815520 -1.000000 0.073066 0 2.474965 8.368501
## ind3 0.288094 2.569323 4.338393 0.107921 0.256071 0 1.815254 2.822285
## ind4 0.663363 11.933540 6.475605 0.000000 5.014762 0 3.182412 9.823132
## ind5 0.380688 4.981566 3.503463 0.200261 -1.000000 0 2.672133 5.945422
## ind6 0.463582 6.632334 4.134599 0.333725 1.689286 0 3.011802 6.636636
## site56 site57 site58 site59 site60 site61 site62 site63
## ind1 3.515977 0.000000 5.617321 5.567360 5.889082 0.407831 4.735254 0.212227
## ind2 0.000000 0.000000 -1.000000 4.662254 6.840784 0.643242 0.000000 0.378215
## ind3 1.317744 2.633277 2.067533 3.208334 4.003303 0.183730 0.435801 0.000000
## ind4 5.101557 1.148384 4.601010 9.515062 7.887465 2.409367 2.435037 0.698486
## ind5 1.046215 0.000000 -1.000000 0.136660 4.014484 0.299023 0.272797 0.719106
## ind6 2.838735 0.488921 9.737784 8.436536 10.875579 0.642340 5.458116 0.009906
## site64 site65 site66 site67 site68 site69 site70 site71
## ind1 4.301725 0.086193 0.195682 0 3.923784 3.012980 0.084219 0
## ind2 0.000000 0.000000 1.024617 -1 4.139583 -1.000000 0.683016 0
## ind3 -1.000000 0.139899 0.000000 0 3.054424 0.778499 2.766324 0
## ind4 12.837560 0.430866 0.102573 0 5.653807 1.565004 4.702235 0
## ind5 0.000000 0.000000 0.494720 0 0.185001 0.102478 0.000000 0
## ind6 8.947218 0.184573 0.226434 0 8.391926 5.339827 0.406806 0
## site72 site73 site74 site75 site76 site77 site78
## ind1 4.481197 0.736700 7.174779 5.692942 0.117824 5.038420 -1.000000
## ind2 0.080532 0.740144 0.000000 -1.000000 0.180125 4.277511 0.000000
## ind3 2.581468 0.544233 4.176406 0.092368 0.000000 0.442165 0.040142
## ind4 1.237159 1.276951 12.122590 7.206281 0.157702 0.510834 0.000000
## ind5 -1.000000 1.091841 0.000000 -1.000000 0.000000 3.393604 -1.000000
## ind6 7.603121 1.407879 10.076725 9.867932 0.212253 3.497185 0.012627
## site79 site80 site81 site82 site83 site84 site85 site86
## ind1 1.222317 0.074298 2.925244 4.445084 1.221600 0.000000 2.218825 2.304839
## ind2 0.579069 2.361009 0.299759 0.000000 -1.000000 0.035173 0.000000 0.000000
## ind3 0.361243 0.021137 0.116697 3.949225 0.000000 0.664386 0.000000 0.000000
## ind4 2.107665 0.097928 0.167922 5.304122 0.142190 0.185392 1.107071 0.219858
## ind5 1.105395 0.273618 0.204405 1.739071 0.000000 0.125757 1.898828 0.036527
## ind6 1.730980 6.016359 0.184774 3.817465 0.528938 0.341512 2.321284 2.074875
## site87 site88 site89 site90 site91 site92 site93 site94
## ind1 0.000000 5.781570 0.731047 2.762129 2.193086 5.669109 4.948302 0.239401
## ind2 3.341576 1.042625 0.570464 1.512688 0.000000 0.206961 2.146975 0.000000
## ind3 2.694466 0.672151 2.886937 1.046025 2.864557 5.115930 3.521272 0.113925
## ind4 3.884768 4.873755 4.610479 3.401538 1.075722 6.348375 5.779720 0.564348
## ind5 4.599888 4.351057 0.915393 2.209481 0.402474 2.972046 3.850597 0.000000
## ind6 1.984991 5.338447 5.046478 2.476585 4.482983 9.222386 4.920970 3.745815
## site95 site96 site97 site98 site99 site100 site101 site102
## ind1 3.890739 0.88387 0.000000 2.115714 1.017555 0.133018 5.686296 3.520340
## ind2 0.000000 -1.00000 -1.000000 1.289655 0.000000 -1.000000 2.571777 0.482079
## ind3 0.505523 0.00000 2.245503 1.295489 1.101571 0.198458 2.507220 3.057959
## ind4 2.553972 0.00000 0.204393 3.732895 0.682663 0.219620 5.440795 4.996793
## ind5 0.269705 -1.00000 0.000000 2.773473 -1.000000 0.000000 3.429925 0.861018
## ind6 3.310597 0.00000 1.290676 3.431699 2.073178 0.273429 4.014097 3.320495
## site103 site104 site105 site106 site107
## ind1 1.448558 5.022188 2.554234 4.375710 0.00000
## ind2 -1.000000 3.015142 -1.000000 1.412338 -1.00000
## ind3 4.206432 2.439314 0.000000 2.534611 1.08814
## ind4 7.513086 2.248669 0.375817 5.752069 0.00000
## ind5 0.000000 0.024655 -1.000000 3.745864 0.00000
## ind6 5.106893 2.776721 3.485361 4.850756 0.00000
# create results directory and run analysis
dir.create(path="prop_output")
out_prop <- McCOIL_proportional(dataA1, dataA2, maxCOI=25, totalrun=5000, burnin=100,
M0=15, epsilon=0.02, err_method=3, path="prop_output",
output="output_test.txt" )
## Iter 500 out of 5000
## Iter 1000 out of 5000
## Iter 1500 out of 5000
## Iter 2000 out of 5000
## Iter 2500 out of 5000
## Iter 3000 out of 5000
## Iter 3500 out of 5000
## Iter 4000 out of 5000
## Iter 4500 out of 5000
## Iter 5000 out of 5000
## Time = 20.00 s
The R functions now return the summary outputs, just for ease of looking at the results:
## view summary data.frame for categorical
str(out_cat)
## 'data.frame': 90 obs. of 8 variables:
## $ file : chr "output_test.txt" "output_test.txt" "output_test.txt" "output_test.txt" ...
## $ CorP : chr "C" "C" "C" "C" ...
## $ name : chr "ind1" "ind2" "ind3" "ind4" ...
## $ mean : num 4 5 4 2 4 3 5 3 2 8 ...
## $ median : num 2 2 2 1 3 3 3 3 2 5 ...
## $ sd : num 5 6.33 3.84 4.76 2.83 ...
## $ quantile0.025: num 2 2 2 1 2 2 2 2 1 3 ...
## $ quantile0.975: num 20 23 18 20 14 ...
## Have a look at the categorical output COI distribution
hist(as.numeric(as.character(out_cat$mean[out_cat$CorP=="C"])),
main = "Categorical Mean COI", xlab="COI")
## view summary data.frame for proportional
str(out_prop)
## 'data.frame': 144 obs. of 8 variables:
## $ file : chr "output_test.txt" "output_test.txt" "output_test.txt" "output_test.txt" ...
## $ CorP : chr "C" "C" "C" "C" ...
## $ name : chr "ind1" "ind2" "ind3" "ind4" ...
## $ mean : num 2 2 2 6 2 4 8 2 2 2 ...
## $ median : num 2 2 2 6 2 4 8 2 2 2 ...
## $ sd : num 0.3149 0.2021 0.0669 1.2659 0.4597 ...
## $ quantile0.025: num 2 2 2 4 2 3 5 2 2 2 ...
## $ quantile0.975: num 3 3 2 9 3 5 13 2 2 3 ...
## Have a look at the proportional output COI distribution
hist(as.numeric(as.character(out_prop$mean[out_cat$CorP=="C"])),
main = "Proportional Mean COI", xlab="COI")