Skip to contents

Run SNMF from R in tidypopgen

Usage

gt_snmf(
  x,
  k,
  project = "continue",
  n_runs = 1,
  alpha,
  tolerance = 1e-05,
  entropy = FALSE,
  percentage = 0.05,
  I,
  iterations = 200,
  ploidy = 2,
  seed = -1
)

Arguments

x

a gen_tibble or a character giving the path to the input geno file

k

an integer giving the number of clusters

project

one of "continue", "new", and "force": "continue" stores files in the current project, "new" creates a new project, and "force" stores results in the current project even if the .geno input file has been altered,

n_runs

the number of runs for each k value (defaults to 1)

alpha

numeric snmf regularization parameter. See LEA::snmf for details

tolerance

numeric value of tolerance (default 0.00001)

entropy

boolean indicating whether to estimate cross-entropy

percentage

numeric value indicating percentage of masked genotypes, ranging between 0 and 1, to be used when entropy = TRUE

I

number of SNPs for initialising the snmf algorithm

iterations

numeric integer for maximum iterations (default 200)

ploidy

the ploidy of the input data (defaults to 2)

seed

the seed for the random number generator

Value

an object of class gt_admix consisting of a list with the following elements:

  • k the number of clusters

  • Q a matrix with the admixture proportions

  • P a matrix with the allele frequencies

  • log a log of the output generated by ADMIXTURE (usually printed on the screen when running from the command line)

  • cv the masked cross-entropy (if entropy is TRUE)

  • loglik the log likelihood of the model

  • id the id column of the input gen_tibble (if applicable)

  • group the group column of the input gen_tibble (if applicable)

Details

This is a wrapper for the function snmf from R package LEA.

Examples

example_gt <- example_gt("gen_tbl")

# To run SNMF on a gen_tibble:
example_gt %>% gt_snmf(
  k = 1:3, project = "force", entropy = TRUE,
  percentage = 0.5, n_runs = 1, seed = 1, alpha = 100
)
#> $k
#> [1] 1 2 3
#> 
#> $Q
#> $Q[[1]]
#>      .Q1
#> [1,]   1
#> [2,]   1
#> [3,]   1
#> [4,]   1
#> [5,]   1
#> [6,]   1
#> [7,]   1
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> $Q[[2]]
#>              .Q1         .Q2
#> [1,] 0.000274926 0.999725000
#> [2,] 0.000100000 0.999900000
#> [3,] 0.000100000 0.999900000
#> [4,] 0.999900000 0.000100000
#> [5,] 0.999742000 0.000257853
#> [6,] 0.999719000 0.000281284
#> [7,] 0.000317214 0.999683000
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> $Q[[3]]
#>              .Q1         .Q2         .Q3
#> [1,] 0.000240022 0.999520000 0.000240022
#> [2,] 0.000222869 0.999554000 0.000222869
#> [3,] 0.000222869 0.999554000 0.000222869
#> [4,] 0.999538000 0.000231010 0.000231010
#> [5,] 0.999800000 0.000099990 0.000099990
#> [6,] 0.000275505 0.000275505 0.999449000
#> [7,] 0.000275525 0.000275525 0.999449000
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> 
#> $P
#> list()
#> 
#> $log
#>   [1] "The project is saved into :"                                                                                                  
#>   [2] "  "                                                                                                                           
#>   [3] ""                                                                                                                             
#>   [4] "To load the project, use:"                                                                                                    
#>   [5] " project = load.snmfProject(\"\")"                                                                                            
#>   [6] ""                                                                                                                             
#>   [7] "To remove the project, use:"                                                                                                  
#>   [8] " remove.snmfProject(\"\")"                                                                                                    
#>   [9] ""                                                                                                                             
#>  [10] "[1] 1"                                                                                                                        
#>  [11] "[1] \"*************************************\""                                                                                
#>  [12] "[1] \"*          create.dataset            *\""                                                                               
#>  [13] "[1] \"*************************************\""                                                                                
#>  [14] "summary of the options:"                                                                                                      
#>  [15] ""                                                                                                                             
#>  [16] "        -n (number of individuals)                 7"                                                                         
#>  [17] "        -L (number of loci)                        6"                                                                         
#>  [18] "        -s (seed random init)                      1"                                                                         
#>  [19] "        -r (percentage of masked data)             0.5"                                                                       
#>  [20] "        -x (genotype file in .geno format)         /tmp/RtmpyN1xne/file22a2345f1541.geno"                                     
#>  [21] "        -o (output file in .geno format)           /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"      
#>  [22] ""                                                                                                                             
#>  [23] " Write genotype file with masked data, /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."          
#>  [24] ""                                                                                                                             
#>  [25] "[1] \"*************************************\""                                                                                
#>  [26] "[1] \"* sNMF K = 1  repetition 1      *\""                                                                                    
#>  [27] "[1] \"*************************************\""                                                                                
#>  [28] "summary of the options:"                                                                                                      
#>  [29] ""                                                                                                                             
#>  [30] "        -n (number of individuals)             7"                                                                             
#>  [31] "        -L (number of loci)                    6"                                                                             
#>  [32] "        -K (number of ancestral pops)          1"                                                                             
#>  [33] "        -x (input file)                        /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"          
#>  [34] "        -q (individual admixture file)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q"         
#>  [35] "        -g (ancestral frequencies file)        /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G"         
#>  [36] "        -i (number max of iterations)          200"                                                                           
#>  [37] "        -a (regularization parameter)          100"                                                                           
#>  [38] "        -s (seed random init)                  94308891885569"                                                                
#>  [39] "        -e (tolerance error)                   1E-05"                                                                         
#>  [40] "        -p (number of processes)               1"                                                                             
#>  [41] "        - diploid"                                                                                                            
#>  [42] ""                                                                                                                             
#>  [43] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."                              
#>  [44] ""                                                                                                                             
#>  [45] ""                                                                                                                             
#>  [46] "Main algorithm:"                                                                                                              
#>  [47] ""                                                                                                                             
#>  [48] "Least-square error: 13.714286"                                                                                                
#>  [49] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q:\t\tOK."     
#>  [50] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G:\tOK."
#>  [51] ""                                                                                                                             
#>  [52] "[1] \"*************************************\""                                                                                
#>  [53] "[1] \"*    cross-entropy estimation       *\""                                                                                
#>  [54] "[1] \"*************************************\""                                                                                
#>  [55] "summary of the options:"                                                                                                      
#>  [56] ""                                                                                                                             
#>  [57] "        -n (number of individuals)         7"                                                                                 
#>  [58] "        -L (number of loci)                6"                                                                                 
#>  [59] "        -K (number of ancestral pops)      1"                                                                                 
#>  [60] "        -x (genotype file)                 /tmp/RtmpyN1xne/file22a2345f1541.geno"                                             
#>  [61] "        -q (individual admixture)          /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.Q"             
#>  [62] "        -g (ancestral frequencies)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K1/run1/file22a2345f1541_r1.1.G"             
#>  [63] "        -i (with masked genotypes)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"              
#>  [64] "        - diploid"                                                                                                            
#>  [65] ""                                                                                                                             
#>  [66] "Cross-Entropy (all data):\t 0.706731"                                                                                         
#>  [67] "Cross-Entropy (masked data):\t 4.42151"                                                                                       
#>  [68] "The project is saved into :"                                                                                                  
#>  [69] "  "                                                                                                                           
#>  [70] ""                                                                                                                             
#>  [71] "To load the project, use:"                                                                                                    
#>  [72] " project = load.snmfProject(\"\")"                                                                                            
#>  [73] ""                                                                                                                             
#>  [74] "To remove the project, use:"                                                                                                  
#>  [75] " remove.snmfProject(\"\")"                                                                                                    
#>  [76] ""                                                                                                                             
#>  [77] "[1] \"*************************************\""                                                                                
#>  [78] "[1] \"* sNMF K = 2  repetition 1      *\""                                                                                    
#>  [79] "[1] \"*************************************\""                                                                                
#>  [80] "summary of the options:"                                                                                                      
#>  [81] ""                                                                                                                             
#>  [82] "        -n (number of individuals)             7"                                                                             
#>  [83] "        -L (number of loci)                    6"                                                                             
#>  [84] "        -K (number of ancestral pops)          2"                                                                             
#>  [85] "        -x (input file)                        /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"          
#>  [86] "        -q (individual admixture file)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q"         
#>  [87] "        -g (ancestral frequencies file)        /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G"         
#>  [88] "        -i (number max of iterations)          200"                                                                           
#>  [89] "        -a (regularization parameter)          100"                                                                           
#>  [90] "        -s (seed random init)                  8589934593"                                                                    
#>  [91] "        -e (tolerance error)                   1E-05"                                                                         
#>  [92] "        -p (number of processes)               1"                                                                             
#>  [93] "        - diploid"                                                                                                            
#>  [94] ""                                                                                                                             
#>  [95] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."                              
#>  [96] ""                                                                                                                             
#>  [97] ""                                                                                                                             
#>  [98] "Main algorithm:"                                                                                                              
#>  [99] "\t[                                                                           ]"                                              
#> [100] "\t[======]"                                                                                                                   
#> [101] "Number of iterations: 16"                                                                                                     
#> [102] ""                                                                                                                             
#> [103] "Least-square error: 14.000528"                                                                                                
#> [104] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q:\t\tOK."     
#> [105] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G:\tOK."
#> [106] ""                                                                                                                             
#> [107] "[1] \"*************************************\""                                                                                
#> [108] "[1] \"*    cross-entropy estimation       *\""                                                                                
#> [109] "[1] \"*************************************\""                                                                                
#> [110] "summary of the options:"                                                                                                      
#> [111] ""                                                                                                                             
#> [112] "        -n (number of individuals)         7"                                                                                 
#> [113] "        -L (number of loci)                6"                                                                                 
#> [114] "        -K (number of ancestral pops)      2"                                                                                 
#> [115] "        -x (genotype file)                 /tmp/RtmpyN1xne/file22a2345f1541.geno"                                             
#> [116] "        -q (individual admixture)          /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.Q"             
#> [117] "        -g (ancestral frequencies)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K2/run1/file22a2345f1541_r1.2.G"             
#> [118] "        -i (with masked genotypes)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"              
#> [119] "        - diploid"                                                                                                            
#> [120] ""                                                                                                                             
#> [121] "Cross-Entropy (all data):\t 0.69943"                                                                                          
#> [122] "Cross-Entropy (masked data):\t 4.79389"                                                                                       
#> [123] "The project is saved into :"                                                                                                  
#> [124] "  "                                                                                                                           
#> [125] ""                                                                                                                             
#> [126] "To load the project, use:"                                                                                                    
#> [127] " project = load.snmfProject(\"\")"                                                                                            
#> [128] ""                                                                                                                             
#> [129] "To remove the project, use:"                                                                                                  
#> [130] " remove.snmfProject(\"\")"                                                                                                    
#> [131] ""                                                                                                                             
#> [132] "[1] \"*************************************\""                                                                                
#> [133] "[1] \"* sNMF K = 3  repetition 1      *\""                                                                                    
#> [134] "[1] \"*************************************\""                                                                                
#> [135] "summary of the options:"                                                                                                      
#> [136] ""                                                                                                                             
#> [137] "        -n (number of individuals)             7"                                                                             
#> [138] "        -L (number of loci)                    6"                                                                             
#> [139] "        -K (number of ancestral pops)          3"                                                                             
#> [140] "        -x (input file)                        /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"          
#> [141] "        -q (individual admixture file)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q"         
#> [142] "        -g (ancestral frequencies file)        /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G"         
#> [143] "        -i (number max of iterations)          200"                                                                           
#> [144] "        -a (regularization parameter)          100"                                                                           
#> [145] "        -s (seed random init)                  94308891885569"                                                                
#> [146] "        -e (tolerance error)                   1E-05"                                                                         
#> [147] "        -p (number of processes)               1"                                                                             
#> [148] "        - diploid"                                                                                                            
#> [149] ""                                                                                                                             
#> [150] "Read genotype file /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno:\t\tOK."                              
#> [151] ""                                                                                                                             
#> [152] ""                                                                                                                             
#> [153] "Main algorithm:"                                                                                                              
#> [154] "\t[                                                                           ]"                                              
#> [155] "\t[======]"                                                                                                                   
#> [156] "Number of iterations: 16"                                                                                                     
#> [157] ""                                                                                                                             
#> [158] "Least-square error: 8.583557"                                                                                                 
#> [159] "Write individual ancestry coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q:\t\tOK."     
#> [160] "Write ancestral allele frequency coefficient file /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G:\tOK."
#> [161] ""                                                                                                                             
#> [162] "[1] \"*************************************\""                                                                                
#> [163] "[1] \"*    cross-entropy estimation       *\""                                                                                
#> [164] "[1] \"*************************************\""                                                                                
#> [165] "summary of the options:"                                                                                                      
#> [166] ""                                                                                                                             
#> [167] "        -n (number of individuals)         7"                                                                                 
#> [168] "        -L (number of loci)                6"                                                                                 
#> [169] "        -K (number of ancestral pops)      3"                                                                                 
#> [170] "        -x (genotype file)                 /tmp/RtmpyN1xne/file22a2345f1541.geno"                                             
#> [171] "        -q (individual admixture)          /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.Q"             
#> [172] "        -g (ancestral frequencies)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/K3/run1/file22a2345f1541_r1.3.G"             
#> [173] "        -i (with masked genotypes)         /tmp/RtmpyN1xne/file22a2345f1541.snmf/masked/file22a2345f1541_I.geno"              
#> [174] "        - diploid"                                                                                                            
#> [175] ""                                                                                                                             
#> [176] "Cross-Entropy (all data):\t 0.349945"                                                                                         
#> [177] "Cross-Entropy (masked data):\t 5.78159"                                                                                       
#> [178] "The project is saved into :"                                                                                                  
#> [179] "  "                                                                                                                           
#> [180] ""                                                                                                                             
#> [181] "To load the project, use:"                                                                                                    
#> [182] " project = load.snmfProject(\"\")"                                                                                            
#> [183] ""                                                                                                                             
#> [184] "To remove the project, use:"                                                                                                  
#> [185] " remove.snmfProject(\"\")"                                                                                                    
#> [186] ""                                                                                                                             
#> [187] "snmf Project"                                                                                                                 
#> [188] ""                                                                                                                             
#> [189] "snmfProject file:                 file22a2345f1541.snmfProject "                                                              
#> [190] "project directory:                /tmp/RtmpyN1xne/ "                                                                          
#> [191] "snmf results directory:           file22a2345f1541.snmf/ "                                                                    
#> [192] "date of creation:                 1749627446 "                                                                                
#> [193] "input file:                       file22a2345f1541.geno "                                                                     
#> [194] "number of individuals:            7 "                                                                                         
#> [195] "number of loci:                   6 "                                                                                         
#> [196] "number of ancestral populations:  1 2 3 "                                                                                     
#> [197] ""                                                                                                                             
#> [198] "***** run *****"                                                                                                              
#> [199] "snmf class"                                                                                                                   
#> [200] ""                                                                                                                             
#> [201] "file directory:                   K1/run1/ "                                                                                  
#> [202] "Q output file:                    file22a2345f1541_r1.1.Q "                                                                   
#> [203] "G output file:                    file22a2345f1541_r1.1.G "                                                                   
#> [204] "snmfClass file:                   file22a2345f1541_r1.1.snmfClass "                                                           
#> [205] "number of ancestral populations:  1 "                                                                                         
#> [206] "run number:                       1 "                                                                                         
#> [207] "regularization parameter:         100 "                                                                                       
#> [208] "number of CPUs:                   1 "                                                                                         
#> [209] "seed:                             1 "                                                                                         
#> [210] "maximal number of iterations:     200 "                                                                                       
#> [211] "tolerance error:                  1e-05 "                                                                                     
#> [212] "Q input file:                      "                                                                                          
#> [213] "cross-Entropy:                    4.421509 "                                                                                  
#> [214] ""                                                                                                                             
#> [215] "***** run *****"                                                                                                              
#> [216] "snmf class"                                                                                                                   
#> [217] ""                                                                                                                             
#> [218] "file directory:                   K2/run1/ "                                                                                  
#> [219] "Q output file:                    file22a2345f1541_r1.2.Q "                                                                   
#> [220] "G output file:                    file22a2345f1541_r1.2.G "                                                                   
#> [221] "snmfClass file:                   file22a2345f1541_r1.2.snmfClass "                                                           
#> [222] "number of ancestral populations:  2 "                                                                                         
#> [223] "run number:                       1 "                                                                                         
#> [224] "regularization parameter:         100 "                                                                                       
#> [225] "number of CPUs:                   1 "                                                                                         
#> [226] "seed:                             1 "                                                                                         
#> [227] "maximal number of iterations:     200 "                                                                                       
#> [228] "tolerance error:                  1e-05 "                                                                                     
#> [229] "Q input file:                      "                                                                                          
#> [230] "cross-Entropy:                    4.793895 "                                                                                  
#> [231] ""                                                                                                                             
#> [232] "***** run *****"                                                                                                              
#> [233] "snmf class"                                                                                                                   
#> [234] ""                                                                                                                             
#> [235] "file directory:                   K3/run1/ "                                                                                  
#> [236] "Q output file:                    file22a2345f1541_r1.3.Q "                                                                   
#> [237] "G output file:                    file22a2345f1541_r1.3.G "                                                                   
#> [238] "snmfClass file:                   file22a2345f1541_r1.3.snmfClass "                                                           
#> [239] "number of ancestral populations:  3 "                                                                                         
#> [240] "run number:                       1 "                                                                                         
#> [241] "regularization parameter:         100 "                                                                                       
#> [242] "number of CPUs:                   1 "                                                                                         
#> [243] "seed:                             1 "                                                                                         
#> [244] "maximal number of iterations:     200 "                                                                                       
#> [245] "tolerance error:                  1e-05 "                                                                                     
#> [246] "Q input file:                      "                                                                                          
#> [247] "cross-Entropy:                    5.781592 "                                                                                  
#> 
#> $loglik
#> numeric(0)
#> 
#> $G
#> $G[[1]]
#>            .Q1
#>  [1,] 0.285714
#>  [2,] 0.428571
#>  [3,] 0.285714
#>  [4,] 0.285714
#>  [5,] 0.714286
#>  [6,] 0.000100
#>  [7,] 0.857143
#>  [8,] 0.142857
#>  [9,] 0.999900
#> [10,] 0.000100
#> [11,] 0.000100
#> [12,] 0.000100
#> [13,] 0.285714
#> [14,] 0.714286
#> [15,] 0.000100
#> [16,] 0.857143
#> [17,] 0.142857
#> [18,] 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> $G[[2]]
#>            .Q1      .Q2
#>  [1,] 0.666578 0.749950
#>  [2,] 0.333422 0.000100
#>  [3,] 0.000100 0.250050
#>  [4,] 0.333359 0.000100
#>  [5,] 0.666641 0.999900
#>  [6,] 0.000100 0.000100
#>  [7,] 0.500021 0.249966
#>  [8,] 0.000100 0.250034
#>  [9,] 0.499979 0.500000
#> [10,] 0.000100 0.000100
#> [11,] 0.000100 0.000100
#> [12,] 0.000100 0.000100
#> [13,] 0.999900 0.500000
#> [14,] 0.000100 0.500000
#> [15,] 0.000100 0.000100
#> [16,] 0.333262 0.750059
#> [17,] 0.666738 0.249941
#> [18,] 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> $G[[3]]
#>            .Q1      .Q2      .Q3
#>  [1,] 0.000100 0.000100 0.999900
#>  [2,] 0.999900 0.333173 0.000100
#>  [3,] 0.000100 0.666827 0.000100
#>  [4,] 0.500116 0.000100 0.999900
#>  [5,] 0.499884 0.999900 0.000100
#>  [6,] 0.000100 0.000100 0.000100
#>  [7,] 0.500014 0.500029 0.249862
#>  [8,] 0.000100 0.000100 0.250138
#>  [9,] 0.499986 0.499971 0.500000
#> [10,] 0.000100 0.000100 0.000100
#> [11,] 0.000100 0.000100 0.000100
#> [12,] 0.000100 0.000100 0.000100
#> [13,] 0.999900 0.000100 0.500000
#> [14,] 0.000100 0.999900 0.500000
#> [15,] 0.000100 0.000100 0.000100
#> [16,] 0.999900 0.999900 0.499724
#> [17,] 0.000100 0.000100 0.500276
#> [18,] 0.000100 0.000100 0.000100
#> attr(,"class")
#> [1] "q_matrix" "matrix"   "array"   
#> 
#> 
#> $cv
#> [1] 4.42151 4.79389 5.78159
#> 
#> $id
#> [1] "a" "b" "c" "d" "e" "f" "g"
#> 
#> $algorithm
#> [1] "SNMF"
#> 
#> attr(,"class")
#> [1] "gt_admix" "list"