Run SNMF from R in tidypopgen
Usage
gt_snmf(
x,
k,
project = "continue",
n_runs = 1,
alpha,
tolerance = 1e-05,
entropy = FALSE,
percentage = 0.05,
I,
iterations = 200,
ploidy = 2,
seed = -1
)
Arguments
- x
a
gen_tibble
or a character giving the path to the input geno file- k
an integer giving the number of clusters
- project
one of "continue", "new", and "force": "continue" stores files in the current project, "new" creates a new project, and "force" stores results in the current project even if the .geno input file has been altered,
- n_runs
the number of runs for each k value (defaults to 1)
- alpha
numeric snmf regularization parameter. See LEA::snmf for details
- tolerance
numeric value of tolerance (default 0.00001)
- entropy
boolean indicating whether to estimate cross-entropy
- percentage
numeric value indicating percentage of masked genotypes, ranging between 0 and 1, to be used when entropy = TRUE
- I
number of SNPs for initialising the snmf algorithm
- iterations
numeric integer for maximum iterations (default 200)
- ploidy
the ploidy of the input data (defaults to 2)
- seed
the seed for the random number generator
Value
an object of class gt_admix
consisting of a list with the following
elements:
k
the number of clustersQ
a matrix with the admixture proportionsP
a matrix with the allele frequencieslog
a log of the output generated by ADMIXTURE (usually printed on the screen when running from the command line)cv
the masked cross-entropy (ifentropy
is TRUE)loglik
the log likelihood of the modelid
the id column of the inputgen_tibble
(if applicable)group
the group column of the inputgen_tibble
(if applicable)
Examples
example_gt <- example_gt("gen_tbl")
# To run SNMF on a gen_tibble:
example_gt %>% gt_snmf(
k = 1:3, project = "force", entropy = TRUE,
percentage = 0.5, n_runs = 1, seed = 1, alpha = 100
)
#> $k
#> [1] 1 2 3
#>
#> $Q
#> $Q[[1]]
#> .Q1
#> [1,] 1
#> [2,] 1
#> [3,] 1
#> [4,] 1
#> [5,] 1
#> [6,] 1
#> [7,] 1
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $Q[[2]]
#> .Q1 .Q2
#> [1,] 0.000274926 0.999725000
#> [2,] 0.000100000 0.999900000
#> [3,] 0.000100000 0.999900000
#> [4,] 0.999900000 0.000100000
#> [5,] 0.999742000 0.000257853
#> [6,] 0.999719000 0.000281284
#> [7,] 0.000317214 0.999683000
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $Q[[3]]
#> .Q1 .Q2 .Q3
#> [1,] 0.000240022 0.999520000 0.000240022
#> [2,] 0.000222869 0.999554000 0.000222869
#> [3,] 0.000222869 0.999554000 0.000222869
#> [4,] 0.999538000 0.000231010 0.000231010
#> [5,] 0.999800000 0.000099990 0.000099990
#> [6,] 0.000275505 0.000275505 0.999449000
#> [7,] 0.000275525 0.000275525 0.999449000
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#>
#> $P
#> list()
#>
#> $log
#> [1] "The project is saved into :"
#> [2] " "
#> [3] ""
#> [4] "To load the project, use:"
#> [5] " project = load.snmfProject(\"\")"
#> [6] ""
#> [7] "To remove the project, use:"
#> [8] " remove.snmfProject(\"\")"
#> [9] ""
#> [10] "[1] 1"
#> [11] "[1] \"*************************************\""
#> [12] "[1] \"* create.dataset *\""
#> [13] "[1] \"*************************************\""
#> [14] "summary of the options:"
#> [15] ""
#> [16] " -n (number of individuals) 7"
#> [17] " -L (number of loci) 6"
#> [18] " -s (seed random init) 1"
#> [19] " -r (percentage of masked data) 0.5"
#> [20] " -x (genotype file in .geno format) /tmp/RtmpyN1xne/file22a2345f1541.geno"
#> [21] " -o (output file in .geno format) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [22] ""
#> [23] " Write genotype file with masked data, /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."
#> [24] ""
#> [25] "[1] \"*************************************\""
#> [26] "[1] \"* sNMF K = 1 repetition 1 *\""
#> [27] "[1] \"*************************************\""
#> [28] "summary of the options:"
#> [29] ""
#> [30] " -n (number of individuals) 7"
#> [31] " -L (number of loci) 6"
#> [32] " -K (number of ancestral pops) 1"
#> [33] " -x (input file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [34] " -q (individual admixture file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q"
#> [35] " -g (ancestral frequencies file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G"
#> [36] " -i (number max of iterations) 200"
#> [37] " -a (regularization parameter) 100"
#> [38] " -s (seed random init) 94308891885569"
#> [39] " -e (tolerance error) 1E-05"
#> [40] " -p (number of processes) 1"
#> [41] " - diploid"
#> [42] ""
#> [43] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."
#> [44] ""
#> [45] ""
#> [46] "Main algorithm:"
#> [47] ""
#> [48] "Least-square error: 13.714286"
#> [49] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q:\t\tOK."
#> [50] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G:\tOK."
#> [51] ""
#> [52] "[1] \"*************************************\""
#> [53] "[1] \"* cross-entropy estimation *\""
#> [54] "[1] \"*************************************\""
#> [55] "summary of the options:"
#> [56] ""
#> [57] " -n (number of individuals) 7"
#> [58] " -L (number of loci) 6"
#> [59] " -K (number of ancestral pops) 1"
#> [60] " -x (genotype file) /tmp/RtmpyN1xne/file22a2345f1541.geno"
#> [61] " -q (individual admixture) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q"
#> [62] " -g (ancestral frequencies) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G"
#> [63] " -i (with masked genotypes) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [64] " - diploid"
#> [65] ""
#> [66] "Cross-Entropy (all data):\t 0.706731"
#> [67] "Cross-Entropy (masked data):\t 4.42151"
#> [68] "The project is saved into :"
#> [69] " "
#> [70] ""
#> [71] "To load the project, use:"
#> [72] " project = load.snmfProject(\"\")"
#> [73] ""
#> [74] "To remove the project, use:"
#> [75] " remove.snmfProject(\"\")"
#> [76] ""
#> [77] "[1] \"*************************************\""
#> [78] "[1] \"* sNMF K = 2 repetition 1 *\""
#> [79] "[1] \"*************************************\""
#> [80] "summary of the options:"
#> [81] ""
#> [82] " -n (number of individuals) 7"
#> [83] " -L (number of loci) 6"
#> [84] " -K (number of ancestral pops) 2"
#> [85] " -x (input file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [86] " -q (individual admixture file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q"
#> [87] " -g (ancestral frequencies file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G"
#> [88] " -i (number max of iterations) 200"
#> [89] " -a (regularization parameter) 100"
#> [90] " -s (seed random init) 8589934593"
#> [91] " -e (tolerance error) 1E-05"
#> [92] " -p (number of processes) 1"
#> [93] " - diploid"
#> [94] ""
#> [95] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."
#> [96] ""
#> [97] ""
#> [98] "Main algorithm:"
#> [99] "\t[ ]"
#> [100] "\t[======]"
#> [101] "Number of iterations: 16"
#> [102] ""
#> [103] "Least-square error: 14.000528"
#> [104] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q:\t\tOK."
#> [105] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G:\tOK."
#> [106] ""
#> [107] "[1] \"*************************************\""
#> [108] "[1] \"* cross-entropy estimation *\""
#> [109] "[1] \"*************************************\""
#> [110] "summary of the options:"
#> [111] ""
#> [112] " -n (number of individuals) 7"
#> [113] " -L (number of loci) 6"
#> [114] " -K (number of ancestral pops) 2"
#> [115] " -x (genotype file) /tmp/RtmpyN1xne/file22a2345f1541.geno"
#> [116] " -q (individual admixture) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q"
#> [117] " -g (ancestral frequencies) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G"
#> [118] " -i (with masked genotypes) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [119] " - diploid"
#> [120] ""
#> [121] "Cross-Entropy (all data):\t 0.69943"
#> [122] "Cross-Entropy (masked data):\t 4.79389"
#> [123] "The project is saved into :"
#> [124] " "
#> [125] ""
#> [126] "To load the project, use:"
#> [127] " project = load.snmfProject(\"\")"
#> [128] ""
#> [129] "To remove the project, use:"
#> [130] " remove.snmfProject(\"\")"
#> [131] ""
#> [132] "[1] \"*************************************\""
#> [133] "[1] \"* sNMF K = 3 repetition 1 *\""
#> [134] "[1] \"*************************************\""
#> [135] "summary of the options:"
#> [136] ""
#> [137] " -n (number of individuals) 7"
#> [138] " -L (number of loci) 6"
#> [139] " -K (number of ancestral pops) 3"
#> [140] " -x (input file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [141] " -q (individual admixture file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q"
#> [142] " -g (ancestral frequencies file) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G"
#> [143] " -i (number max of iterations) 200"
#> [144] " -a (regularization parameter) 100"
#> [145] " -s (seed random init) 94308891885569"
#> [146] " -e (tolerance error) 1E-05"
#> [147] " -p (number of processes) 1"
#> [148] " - diploid"
#> [149] ""
#> [150] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."
#> [151] ""
#> [152] ""
#> [153] "Main algorithm:"
#> [154] "\t[ ]"
#> [155] "\t[======]"
#> [156] "Number of iterations: 16"
#> [157] ""
#> [158] "Least-square error: 8.583557"
#> [159] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q:\t\tOK."
#> [160] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G:\tOK."
#> [161] ""
#> [162] "[1] \"*************************************\""
#> [163] "[1] \"* cross-entropy estimation *\""
#> [164] "[1] \"*************************************\""
#> [165] "summary of the options:"
#> [166] ""
#> [167] " -n (number of individuals) 7"
#> [168] " -L (number of loci) 6"
#> [169] " -K (number of ancestral pops) 3"
#> [170] " -x (genotype file) /tmp/RtmpyN1xne/file22a2345f1541.geno"
#> [171] " -q (individual admixture) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q"
#> [172] " -g (ancestral frequencies) /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G"
#> [173] " -i (with masked genotypes) /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"
#> [174] " - diploid"
#> [175] ""
#> [176] "Cross-Entropy (all data):\t 0.349945"
#> [177] "Cross-Entropy (masked data):\t 5.78159"
#> [178] "The project is saved into :"
#> [179] " "
#> [180] ""
#> [181] "To load the project, use:"
#> [182] " project = load.snmfProject(\"\")"
#> [183] ""
#> [184] "To remove the project, use:"
#> [185] " remove.snmfProject(\"\")"
#> [186] ""
#> [187] "snmf Project"
#> [188] ""
#> [189] "snmfProject file: file22a2345f1541.snmfProject "
#> [190] "project directory: /tmp/RtmpyN1xne/ "
#> [191] "snmf results directory: file22a2345f1541.snmf/ "
#> [192] "date of creation: 1749627446 "
#> [193] "input file: file22a2345f1541.geno "
#> [194] "number of individuals: 7 "
#> [195] "number of loci: 6 "
#> [196] "number of ancestral populations: 1 2 3 "
#> [197] ""
#> [198] "***** run *****"
#> [199] "snmf class"
#> [200] ""
#> [201] "file directory: K1/run1/ "
#> [202] "Q output file: file22a2345f1541_r1.1.Q "
#> [203] "G output file: file22a2345f1541_r1.1.G "
#> [204] "snmfClass file: file22a2345f1541_r1.1.snmfClass "
#> [205] "number of ancestral populations: 1 "
#> [206] "run number: 1 "
#> [207] "regularization parameter: 100 "
#> [208] "number of CPUs: 1 "
#> [209] "seed: 1 "
#> [210] "maximal number of iterations: 200 "
#> [211] "tolerance error: 1e-05 "
#> [212] "Q input file: "
#> [213] "cross-Entropy: 4.421509 "
#> [214] ""
#> [215] "***** run *****"
#> [216] "snmf class"
#> [217] ""
#> [218] "file directory: K2/run1/ "
#> [219] "Q output file: file22a2345f1541_r1.2.Q "
#> [220] "G output file: file22a2345f1541_r1.2.G "
#> [221] "snmfClass file: file22a2345f1541_r1.2.snmfClass "
#> [222] "number of ancestral populations: 2 "
#> [223] "run number: 1 "
#> [224] "regularization parameter: 100 "
#> [225] "number of CPUs: 1 "
#> [226] "seed: 1 "
#> [227] "maximal number of iterations: 200 "
#> [228] "tolerance error: 1e-05 "
#> [229] "Q input file: "
#> [230] "cross-Entropy: 4.793895 "
#> [231] ""
#> [232] "***** run *****"
#> [233] "snmf class"
#> [234] ""
#> [235] "file directory: K3/run1/ "
#> [236] "Q output file: file22a2345f1541_r1.3.Q "
#> [237] "G output file: file22a2345f1541_r1.3.G "
#> [238] "snmfClass file: file22a2345f1541_r1.3.snmfClass "
#> [239] "number of ancestral populations: 3 "
#> [240] "run number: 1 "
#> [241] "regularization parameter: 100 "
#> [242] "number of CPUs: 1 "
#> [243] "seed: 1 "
#> [244] "maximal number of iterations: 200 "
#> [245] "tolerance error: 1e-05 "
#> [246] "Q input file: "
#> [247] "cross-Entropy: 5.781592 "
#>
#> $loglik
#> numeric(0)
#>
#> $G
#> $G[[1]]
#> .Q1
#> [1,] 0.285714
#> [2,] 0.428571
#> [3,] 0.285714
#> [4,] 0.285714
#> [5,] 0.714286
#> [6,] 0.000100
#> [7,] 0.857143
#> [8,] 0.142857
#> [9,] 0.999900
#> [10,] 0.000100
#> [11,] 0.000100
#> [12,] 0.000100
#> [13,] 0.285714
#> [14,] 0.714286
#> [15,] 0.000100
#> [16,] 0.857143
#> [17,] 0.142857
#> [18,] 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $G[[2]]
#> .Q1 .Q2
#> [1,] 0.666578 0.749950
#> [2,] 0.333422 0.000100
#> [3,] 0.000100 0.250050
#> [4,] 0.333359 0.000100
#> [5,] 0.666641 0.999900
#> [6,] 0.000100 0.000100
#> [7,] 0.500021 0.249966
#> [8,] 0.000100 0.250034
#> [9,] 0.499979 0.500000
#> [10,] 0.000100 0.000100
#> [11,] 0.000100 0.000100
#> [12,] 0.000100 0.000100
#> [13,] 0.999900 0.500000
#> [14,] 0.000100 0.500000
#> [15,] 0.000100 0.000100
#> [16,] 0.333262 0.750059
#> [17,] 0.666738 0.249941
#> [18,] 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $G[[3]]
#> .Q1 .Q2 .Q3
#> [1,] 0.000100 0.000100 0.999900
#> [2,] 0.999900 0.333173 0.000100
#> [3,] 0.000100 0.666827 0.000100
#> [4,] 0.500116 0.000100 0.999900
#> [5,] 0.499884 0.999900 0.000100
#> [6,] 0.000100 0.000100 0.000100
#> [7,] 0.500014 0.500029 0.249862
#> [8,] 0.000100 0.000100 0.250138
#> [9,] 0.499986 0.499971 0.500000
#> [10,] 0.000100 0.000100 0.000100
#> [11,] 0.000100 0.000100 0.000100
#> [12,] 0.000100 0.000100 0.000100
#> [13,] 0.999900 0.000100 0.500000
#> [14,] 0.000100 0.999900 0.500000
#> [15,] 0.000100 0.000100 0.000100
#> [16,] 0.999900 0.999900 0.499724
#> [17,] 0.000100 0.000100 0.500276
#> [18,] 0.000100 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#>
#> $cv
#> [1] 4.42151 4.79389 5.78159
#>
#> $id
#> [1] "a" "b" "c" "d" "e" "f" "g"
#>
#> $algorithm
#> [1] "SNMF"
#>
#> attr(,"class")
#> [1] "gt_admix" "list"