Skip to content

Error Check that is.data.table(DT) == TRUE. #1

@stiatragul

Description

@stiatragul

Hey Carl,

I want to report an issue I found when running summarizeAlignments.

align.dir = "test_dir/"

align.summary <- summarizeAlignments(alignment.path = align.dir,
                                     alignment.format = "phylip",
                                     dataset.name = "Spheno_AHE",
                                     file.export = "Sphenomorphini/Spheno_AHE.csv",
                                     overwrite = TRUE)

I keep getting a data.table error:

Error: Check that is.data.table(DT) == TRUE. Otherwise, :=, `:=`(...) and let(...) are defined for use in j, once only and in particular ways. See help(":=").

However, when I redefined the function with the exact code you've written (without any modifications), it works. Note that I just changed summarize to summarise to test it out.

summariseAlignments<-function (alignment.path = NULL, file.export = NULL, overwrite = FALSE, 
                 dataset.name = NULL, alignment.format = c("phylip", "nexus")) 
{
  require(data.table)
  if (is.null(alignment.path) == TRUE) {
    stop("Error: no alignment path provided.")
  }
  if (is.null(dataset.name) == TRUE) {
    stop("Error: a dataset name is needed.")
  }
  if (dir.exists(alignment.path) == F) {
    return(paste0("Directory of alignments could not be found. Exiting."))
  }
  if (overwrite == TRUE) {
    if (file.exists(paste0(file.export, ".csv")) == T) {
      system(paste0("rm ", file.export, ".csv"))
    }
  }
  else {
    if (file.exists(paste0(file.export, ".csv")) == T) {
      print(paste0("File exists for ", file.export, " and overwrite = FALSE. Exiting."))
      save.data = read.csv(paste0(file.export, ".csv"))
      return(save.data)
    }
  }
  align.names = list.files(alignment.path)
  header.data = c("dataset", "file", "number_samples", "proportion_samples", 
                  "alignment_length", "count_pis", "proportion_pis", "count_missing_bp", 
                  "proportion_missing_bp")
  collect.data = data.table::data.table(matrix(as.numeric(0), 
                                               nrow = length(align.names), ncol = length(header.data)))
  data.table::setnames(collect.data, header.data)
  collect.data[, `:=`(file, as.character(file))]
  collect.data[, `:=`(dataset, as.character(dataset))]
  for (x in 1:length(align.names)) {
    if (alignment.format == "phylip") {
      align = ape::read.dna(paste0(alignment.path, "/", 
                                   align.names[x]), format = "sequential")
    }
    if (alignment.format == "nexus") {
      align = ape::read.nexus.data(paste0(alignment.path, 
                                          "/", align.names[x]))
      align = ape::as.DNAbin(matrix(unlist(align), ncol = length(align[[1]]), 
                                    byrow = TRUE))
    }
    data.table::set(collect.data, i = as.integer(x), j = match("dataset", 
                                                               header.data), value = dataset.name)
    data.table::set(collect.data, i = as.integer(x), j = match("file", 
                                                               header.data), value = align.names[x])
    data.table::set(collect.data, i = as.integer(x), j = match("number_samples", 
                                                               header.data), value = nrow(align))
    data.table::set(collect.data, i = as.integer(x), j = match("alignment_length", 
                                                               header.data), value = ncol(align))
    count.pis = PhyloConfigR::informativeSites(align, count = T, 
                                               ambiguities = T)
    prop.pis = round(count.pis/ncol(align), 3)
    data.table::set(collect.data, i = as.integer(x), j = match("count_pis", 
                                                               header.data), value = count.pis)
    data.table::set(collect.data, i = as.integer(x), j = match("proportion_pis", 
                                                               header.data), value = prop.pis)
    len.temp = as.character(as.list(align))
    len.loci = lapply(len.temp, function(x) x[x != "-"])
    len.loci = lapply(len.loci, function(x) x[x != "n"])
    len.loci = lapply(len.loci, function(x) x[x != "?"])
    spp.len = unlist(lapply(len.loci, function(x) length(x)))
    miss.total = (max(spp.len) - spp.len)
    miss.prop = round(sum(miss.total)/(max(spp.len) * nrow(align)), 
                      3)
    data.table::set(collect.data, i = as.integer(x), j = match("count_missing_bp", 
                                                               header.data), value = sum(miss.total))
    data.table::set(collect.data, i = as.integer(x), j = match("proportion_missing_bp", 
                                                               header.data), value = miss.prop)
  }
  save.data = collect.data[collect.data$file != 0, ]
  save.data[, `:=`(proportion_samples, round(number_samples/max(number_samples), 
                                             3))]
  if (is.null(file.export) != TRUE) {
    write.csv(save.data, file = paste0(file.export, ".csv"), 
              row.names = F)
  }
  return(save.data)
}
align.summary <- summariseAlignments(alignment.path = align.dir,
                                     alignment.format = "phylip",
                                     dataset.name = "Spheno_AHE",
                                     file.export = "Sphenomorphini/Spheno_AHE.csv",
                                     overwrite = TRUE)

The function works as expected. I'm not sure why, but thought I'd report it as an issue.

My session info:

> sessionInfo()
R version 4.4.0 (2024-04-24 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 10 x64 (build 19045)

Matrix products: default


locale:
[1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8    LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.utf8    

time zone: Australia/Sydney
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] BiocManager_1.30.22 data.table_1.15.4   ape_5.7-1           PhyloConfigR_0.2.0 

loaded via a namespace (and not attached):
 [1] miniUI_0.1.1.1    compiler_4.4.0    crayon_1.5.2      promises_1.2.1    Rcpp_1.0.12       stringr_1.5.1     parallel_4.4.0    callr_3.7.3      
 [9] later_1.3.1       fastmap_1.2.0     lattice_0.22-6    mime_0.12         R6_2.5.1          curl_5.1.0        htmlwidgets_1.6.4 desc_1.4.2       
[17] profvis_0.3.8     rprojroot_2.0.4   shiny_1.7.5.1     rlang_1.1.3       cachem_1.1.0      stringi_1.7.12    httpuv_1.6.12     fs_1.6.4         
[25] pkgload_1.3.3     memoise_2.0.1     cli_3.6.2         magrittr_2.0.3    ps_1.7.5          grid_4.4.0        digest_0.6.33     processx_3.8.2   
[33] rstudioapi_0.15.0 xtable_1.8-4      remotes_2.4.2.1   devtools_2.4.5    nlme_3.1-163      lifecycle_1.0.4   prettyunits_1.2.0 vctrs_0.6.4      
[41] glue_1.7.0        urlchecker_1.0.1  sessioninfo_1.2.2 pkgbuild_1.4.2    purrr_1.0.2       usethis_2.2.3     tools_4.4.0       ellipsis_0.3.2   
[49] htmltools_0.5.8.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions