Run SNMF from R in tidypopgen
Usage
gt_snmf(
x,
k,
project = "continue",
n_runs = 1,
alpha,
tolerance = 1e-05,
entropy = FALSE,
percentage = 0.05,
I,
iterations = 200,
ploidy = 2,
seed = -1
)Arguments
- x
a
gen_tibbleor a character giving the path to the input geno file- k
an integer giving the number of clusters
- project
one of "continue", "new", and "force": "continue" stores files in the current project, "new" creates a new project, and "force" stores results in the current project even if the .geno input file has been altered,
- n_runs
the number of runs for each k value (defaults to 1)
- alpha
numeric snmf regularization parameter. See LEA::snmf for details
- tolerance
numeric value of tolerance (default 0.00001)
- entropy
boolean indicating whether to estimate cross-entropy
- percentage
numeric value indicating percentage of masked genotypes, ranging between 0 and 1, to be used when entropy = TRUE
- I
number of SNPs for initialising the snmf algorithm
- iterations
numeric integer for maximum iterations (default 200)
- ploidy
the ploidy of the input data (defaults to 2)
- seed
the seed for the random number generator
Value
an object of class gt_admix consisting of a list with the following
elements:
kthe number of clustersQa matrix with the admixture proportionsPa matrix with the allele frequenciesloga log of the output generated by ADMIXTURE (usually printed on the screen when running from the command line)cvthe masked cross-entropy (ifentropyis TRUE)loglikthe log likelihood of the modelidthe id column of the inputgen_tibble(if applicable)groupthe group column of the inputgen_tibble(if applicable)
Details
This is a wrapper for LEA::snmf().
Examples
# run the example only if we have the package installed
example_gt <- load_example_gt("gen_tbl")
# To run SNMF on a gen_tibble:
example_gt %>% gt_snmf(
k = 1:3, project = "force", entropy = TRUE,
percentage = 0.5, n_runs = 1, seed = 1, alpha = 100
)
#> $k
#> [1] 1 2 3
#>
#> $Q
#> $Q[[1]]
#> .Q1
#> [1,] 1
#> [2,] 1
#> [3,] 1
#> [4,] 1
#> [5,] 1
#> [6,] 1
#> [7,] 1
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $Q[[2]]
#> .Q1 .Q2
#> [1,] 0.000274926 0.999725000
#> [2,] 0.000100000 0.999900000
#> [3,] 0.000100000 0.999900000
#> [4,] 0.999900000 0.000100000
#> [5,] 0.999742000 0.000257853
#> [6,] 0.999719000 0.000281284
#> [7,] 0.000317214 0.999683000
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $Q[[3]]
#> .Q1 .Q2 .Q3
#> [1,] 0.000240022 0.999520000 0.000240022
#> [2,] 0.000222869 0.999554000 0.000222869
#> [3,] 0.000222869 0.999554000 0.000222869
#> [4,] 0.999538000 0.000231010 0.000231010
#> [5,] 0.999800000 0.000099990 0.000099990
#> [6,] 0.000275505 0.000275505 0.999449000
#> [7,] 0.000275525 0.000275525 0.999449000
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#>
#> $P
#> list()
#>
#> $log
#> [1] "The project is saved into :"
#> [2] " "
#> [3] ""
#> [4] "To load the project, use:"
#> [5] " project = load.snmfProject(\"\")"
#> [6] ""
#> [7] "To remove the project, use:"
#> [8] " remove.snmfProject(\"\")"
#> [9] ""
#> [10] "[1] 1"
#> [11] "[1] \"*************************************\""
#> [12] "[1] \"* create.dataset *\""
#> [13] "[1] \"*************************************\""
#> [14] "summary of the options:"
#> [15] ""
#> [16] " -n (number of individuals) 7"
#> [17] " -L (number of loci) 6"
#> [18] " -s (seed random init) 1"
#> [19] " -r (percentage of masked data) 0.5"
#> [20] " -x (genotype file in .geno format) /tmp/RtmpqV1Lku/file23b4330868e7.geno"
#> [21] " -o (output file in .geno format) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [22] ""
#> [23] " Write genotype file with masked data, /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno:\t\tOK."
#> [24] ""
#> [25] "[1] \"*************************************\""
#> [26] "[1] \"* sNMF K = 1 repetition 1 *\""
#> [27] "[1] \"*************************************\""
#> [28] "summary of the options:"
#> [29] ""
#> [30] " -n (number of individuals) 7"
#> [31] " -L (number of loci) 6"
#> [32] " -K (number of ancestral pops) 1"
#> [33] " -x (input file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [34] " -q (individual admixture file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.Q"
#> [35] " -g (ancestral frequencies file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.G"
#> [36] " -i (number max of iterations) 200"
#> [37] " -a (regularization parameter) 100"
#> [38] " -s (seed random init) 93857920319489"
#> [39] " -e (tolerance error) 1E-05"
#> [40] " -p (number of processes) 1"
#> [41] " - diploid"
#> [42] ""
#> [43] "Read genotype file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno:\t\tOK."
#> [44] ""
#> [45] ""
#> [46] "Main algorithm:"
#> [47] ""
#> [48] "Least-square error: 13.714286"
#> [49] "Write individual ancestry coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.Q:\t\tOK."
#> [50] "Write ancestral allele frequency coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.G:\tOK."
#> [51] ""
#> [52] "[1] \"*************************************\""
#> [53] "[1] \"* cross-entropy estimation *\""
#> [54] "[1] \"*************************************\""
#> [55] "summary of the options:"
#> [56] ""
#> [57] " -n (number of individuals) 7"
#> [58] " -L (number of loci) 6"
#> [59] " -K (number of ancestral pops) 1"
#> [60] " -x (genotype file) /tmp/RtmpqV1Lku/file23b4330868e7.geno"
#> [61] " -q (individual admixture) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.Q"
#> [62] " -g (ancestral frequencies) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K1/run1/file23b4330868e7_r1.1.G"
#> [63] " -i (with masked genotypes) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [64] " - diploid"
#> [65] ""
#> [66] "Cross-Entropy (all data):\t 0.706731"
#> [67] "Cross-Entropy (masked data):\t 4.42151"
#> [68] "The project is saved into :"
#> [69] " "
#> [70] ""
#> [71] "To load the project, use:"
#> [72] " project = load.snmfProject(\"\")"
#> [73] ""
#> [74] "To remove the project, use:"
#> [75] " remove.snmfProject(\"\")"
#> [76] ""
#> [77] "[1] \"*************************************\""
#> [78] "[1] \"* sNMF K = 2 repetition 1 *\""
#> [79] "[1] \"*************************************\""
#> [80] "summary of the options:"
#> [81] ""
#> [82] " -n (number of individuals) 7"
#> [83] " -L (number of loci) 6"
#> [84] " -K (number of ancestral pops) 2"
#> [85] " -x (input file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [86] " -q (individual admixture file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.Q"
#> [87] " -g (ancestral frequencies file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.G"
#> [88] " -i (number max of iterations) 200"
#> [89] " -a (regularization parameter) 100"
#> [90] " -s (seed random init) 93857920319489"
#> [91] " -e (tolerance error) 1E-05"
#> [92] " -p (number of processes) 1"
#> [93] " - diploid"
#> [94] ""
#> [95] "Read genotype file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno:\t\tOK."
#> [96] ""
#> [97] ""
#> [98] "Main algorithm:"
#> [99] "\t[ ]"
#> [100] "\t[======]"
#> [101] "Number of iterations: 16"
#> [102] ""
#> [103] "Least-square error: 14.000528"
#> [104] "Write individual ancestry coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.Q:\t\tOK."
#> [105] "Write ancestral allele frequency coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.G:\tOK."
#> [106] ""
#> [107] "[1] \"*************************************\""
#> [108] "[1] \"* cross-entropy estimation *\""
#> [109] "[1] \"*************************************\""
#> [110] "summary of the options:"
#> [111] ""
#> [112] " -n (number of individuals) 7"
#> [113] " -L (number of loci) 6"
#> [114] " -K (number of ancestral pops) 2"
#> [115] " -x (genotype file) /tmp/RtmpqV1Lku/file23b4330868e7.geno"
#> [116] " -q (individual admixture) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.Q"
#> [117] " -g (ancestral frequencies) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K2/run1/file23b4330868e7_r1.2.G"
#> [118] " -i (with masked genotypes) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [119] " - diploid"
#> [120] ""
#> [121] "Cross-Entropy (all data):\t 0.69943"
#> [122] "Cross-Entropy (masked data):\t 4.79389"
#> [123] "The project is saved into :"
#> [124] " "
#> [125] ""
#> [126] "To load the project, use:"
#> [127] " project = load.snmfProject(\"\")"
#> [128] ""
#> [129] "To remove the project, use:"
#> [130] " remove.snmfProject(\"\")"
#> [131] ""
#> [132] "[1] \"*************************************\""
#> [133] "[1] \"* sNMF K = 3 repetition 1 *\""
#> [134] "[1] \"*************************************\""
#> [135] "summary of the options:"
#> [136] ""
#> [137] " -n (number of individuals) 7"
#> [138] " -L (number of loci) 6"
#> [139] " -K (number of ancestral pops) 3"
#> [140] " -x (input file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [141] " -q (individual admixture file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.Q"
#> [142] " -g (ancestral frequencies file) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.G"
#> [143] " -i (number max of iterations) 200"
#> [144] " -a (regularization parameter) 100"
#> [145] " -s (seed random init) 93857920319489"
#> [146] " -e (tolerance error) 1E-05"
#> [147] " -p (number of processes) 1"
#> [148] " - diploid"
#> [149] ""
#> [150] "Read genotype file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno:\t\tOK."
#> [151] ""
#> [152] ""
#> [153] "Main algorithm:"
#> [154] "\t[ ]"
#> [155] "\t[======]"
#> [156] "Number of iterations: 16"
#> [157] ""
#> [158] "Least-square error: 8.583557"
#> [159] "Write individual ancestry coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.Q:\t\tOK."
#> [160] "Write ancestral allele frequency coefficient file /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.G:\tOK."
#> [161] ""
#> [162] "[1] \"*************************************\""
#> [163] "[1] \"* cross-entropy estimation *\""
#> [164] "[1] \"*************************************\""
#> [165] "summary of the options:"
#> [166] ""
#> [167] " -n (number of individuals) 7"
#> [168] " -L (number of loci) 6"
#> [169] " -K (number of ancestral pops) 3"
#> [170] " -x (genotype file) /tmp/RtmpqV1Lku/file23b4330868e7.geno"
#> [171] " -q (individual admixture) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.Q"
#> [172] " -g (ancestral frequencies) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/K3/run1/file23b4330868e7_r1.3.G"
#> [173] " -i (with masked genotypes) /tmp/RtmpqV1Lku/file23b4330868e7.snmf/masked/file23b4330868e7_I.geno"
#> [174] " - diploid"
#> [175] ""
#> [176] "Cross-Entropy (all data):\t 0.349945"
#> [177] "Cross-Entropy (masked data):\t 5.78159"
#> [178] "The project is saved into :"
#> [179] " "
#> [180] ""
#> [181] "To load the project, use:"
#> [182] " project = load.snmfProject(\"\")"
#> [183] ""
#> [184] "To remove the project, use:"
#> [185] " remove.snmfProject(\"\")"
#> [186] ""
#> [187] "snmf Project"
#> [188] ""
#> [189] "snmfProject file: file23b4330868e7.snmfProject "
#> [190] "project directory: /tmp/RtmpqV1Lku/ "
#> [191] "snmf results directory: file23b4330868e7.snmf/ "
#> [192] "date of creation: 1761412839 "
#> [193] "input file: file23b4330868e7.geno "
#> [194] "number of individuals: 7 "
#> [195] "number of loci: 6 "
#> [196] "number of ancestral populations: 1 2 3 "
#> [197] ""
#> [198] "***** run *****"
#> [199] "snmf class"
#> [200] ""
#> [201] "file directory: K1/run1/ "
#> [202] "Q output file: file23b4330868e7_r1.1.Q "
#> [203] "G output file: file23b4330868e7_r1.1.G "
#> [204] "snmfClass file: file23b4330868e7_r1.1.snmfClass "
#> [205] "number of ancestral populations: 1 "
#> [206] "run number: 1 "
#> [207] "regularization parameter: 100 "
#> [208] "number of CPUs: 1 "
#> [209] "seed: 1 "
#> [210] "maximal number of iterations: 200 "
#> [211] "tolerance error: 1e-05 "
#> [212] "Q input file: "
#> [213] "cross-Entropy: 4.421509 "
#> [214] ""
#> [215] "***** run *****"
#> [216] "snmf class"
#> [217] ""
#> [218] "file directory: K2/run1/ "
#> [219] "Q output file: file23b4330868e7_r1.2.Q "
#> [220] "G output file: file23b4330868e7_r1.2.G "
#> [221] "snmfClass file: file23b4330868e7_r1.2.snmfClass "
#> [222] "number of ancestral populations: 2 "
#> [223] "run number: 1 "
#> [224] "regularization parameter: 100 "
#> [225] "number of CPUs: 1 "
#> [226] "seed: 1 "
#> [227] "maximal number of iterations: 200 "
#> [228] "tolerance error: 1e-05 "
#> [229] "Q input file: "
#> [230] "cross-Entropy: 4.793895 "
#> [231] ""
#> [232] "***** run *****"
#> [233] "snmf class"
#> [234] ""
#> [235] "file directory: K3/run1/ "
#> [236] "Q output file: file23b4330868e7_r1.3.Q "
#> [237] "G output file: file23b4330868e7_r1.3.G "
#> [238] "snmfClass file: file23b4330868e7_r1.3.snmfClass "
#> [239] "number of ancestral populations: 3 "
#> [240] "run number: 1 "
#> [241] "regularization parameter: 100 "
#> [242] "number of CPUs: 1 "
#> [243] "seed: 1 "
#> [244] "maximal number of iterations: 200 "
#> [245] "tolerance error: 1e-05 "
#> [246] "Q input file: "
#> [247] "cross-Entropy: 5.781592 "
#>
#> $loglik
#> numeric(0)
#>
#> $G
#> $G[[1]]
#> .Q1
#> [1,] 0.285714
#> [2,] 0.428571
#> [3,] 0.285714
#> [4,] 0.285714
#> [5,] 0.714286
#> [6,] 0.000100
#> [7,] 0.857143
#> [8,] 0.142857
#> [9,] 0.999900
#> [10,] 0.000100
#> [11,] 0.000100
#> [12,] 0.000100
#> [13,] 0.285714
#> [14,] 0.714286
#> [15,] 0.000100
#> [16,] 0.857143
#> [17,] 0.142857
#> [18,] 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $G[[2]]
#> .Q1 .Q2
#> [1,] 0.666578 0.749950
#> [2,] 0.333422 0.000100
#> [3,] 0.000100 0.250050
#> [4,] 0.333359 0.000100
#> [5,] 0.666641 0.999900
#> [6,] 0.000100 0.000100
#> [7,] 0.500021 0.249966
#> [8,] 0.000100 0.250034
#> [9,] 0.499979 0.500000
#> [10,] 0.000100 0.000100
#> [11,] 0.000100 0.000100
#> [12,] 0.000100 0.000100
#> [13,] 0.999900 0.500000
#> [14,] 0.000100 0.500000
#> [15,] 0.000100 0.000100
#> [16,] 0.333262 0.750059
#> [17,] 0.666738 0.249941
#> [18,] 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#> $G[[3]]
#> .Q1 .Q2 .Q3
#> [1,] 0.000100 0.000100 0.999900
#> [2,] 0.999900 0.333173 0.000100
#> [3,] 0.000100 0.666827 0.000100
#> [4,] 0.500116 0.000100 0.999900
#> [5,] 0.499884 0.999900 0.000100
#> [6,] 0.000100 0.000100 0.000100
#> [7,] 0.500014 0.500029 0.249862
#> [8,] 0.000100 0.000100 0.250138
#> [9,] 0.499986 0.499971 0.500000
#> [10,] 0.000100 0.000100 0.000100
#> [11,] 0.000100 0.000100 0.000100
#> [12,] 0.000100 0.000100 0.000100
#> [13,] 0.999900 0.000100 0.500000
#> [14,] 0.000100 0.999900 0.500000
#> [15,] 0.000100 0.000100 0.000100
#> [16,] 0.999900 0.999900 0.499724
#> [17,] 0.000100 0.000100 0.500276
#> [18,] 0.000100 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix" "array"
#>
#>
#> $cv
#> [1] 4.42151 4.79389 5.78159
#>
#> $id
#> [1] "a" "b" "c" "d" "e" "f" "g"
#>
#> $algorithm
#> [1] "SNMF"
#>
#> attr(,"class")
#> [1] "gt_admix" "list"
