Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Resources/dqd-omop-5.4/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM r-base:4.4.3
RUN echo 'install.packages("remotes")' | r
RUN echo 'remotes::install_github("OHDSI/DataQualityDashboard")' | r
RUN apt-get update
RUN apt-get install -y default-jre default-jdk

RUN echo 'remotes::install_github("OHDSI/DbDiagnostics")' | r
RUN echo 'remotes::install_github("OHDSI/DatabaseConnector")' | r
RUN mkdir /app/
RUN apt-get update
RUN apt-get install -y default-jdk default-jre
RUN java --version
RUN apt-get install -y curl xml2
RUN apt-get install -y libcurl4-openssl-dev libxml2-dev

RUN echo 'install.packages("ParallelLogger")' | r
RUN echo 'install.packages("DBI")' | r
RUN echo 'install.packages("rJava")' | r
RUN echo 'install.packages("RJDBC")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("rJava")' | r

RUN echo 'install.packages("Achilles")' | r
RUN echo 'install.packages("bit")' | r
RUN echo 'install.packages("curl")' | r
RUN echo 'install.packages("TTR")' | r
RUN echo 'install.packages("quantmod")' | r
RUN echo 'install.packages("tseries") ' | r

RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("Achilles")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("bit")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/jashiva-21-openjdk-amd64/lib/server/; echo 'library("curl")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("TTR")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("quantmod")' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'library("tseries") ' | r
RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; echo 'install.packages("RSQLite")' | r

RUN echo 'if (!require("DbDiagnostics")){remotes::install_github("OHDSI/DbDiagnostics")}' | r
RUN echo 'if (!require("OHDSI/DatabaseConnector")){remotes::install_github("OHDSI/DatabaseConnector")}' | r
RUN echo 'if (!require("DT")){install.packages("DT")}' | r
#COPY qc-lib.r /app/
#RUN export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; r /app/qc-lib.r


RUN apt-get install -y python3
RUN apt-get clean
WORKDIR /app

ENTRYPOINT while true; do python3 server.py; sleep 3; done
258 changes: 258 additions & 0 deletions Resources/dqd-omop-5.4/qc.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
###############################################################################
## Run Achilles ##


if (!require("DbDiagnostics")){
remotes::install_github("OHDSI/DbDiagnostics")
}

if (!require("DatabaseConnector")){
remotes::install_github("OHDSI/DatabaseConnector")
}

# Get started: https://ohdsi.github.io/DbDiagnostics/articles/RunAndUploadDbProfile.html
# Call the library
library(DbDiagnostics)

# Turn off the connection pane to speed up run time
options(connectionObserver = NULL)

if (!require('RSQLite')) {
install.packages("RSQLite")
}

# if (!require('shiny')) {
# install.packages("shiny")
# }

if (!require('DT')) {
install.packages('DT')
}

args <- commandArgs(trailingOnly = TRUE)

sqliteDb = args[1]
outputFolder = args[2]
outputFile = args[3]

if (!file.exists(sqliteDb)) {
stop("Error: SQLite database file not exists")
}

if (!dir.exists(outputFolder)) {
stop("Error: output folder not exists")
}

# Create connection details
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "sqlite",
server = sqliteDb
)

## Run in postgres database interface:
#\c ivan;
#CREATE SCHEMA reference;
#CREATE SCHEMA qc_result;

# The schema where your CDM-structured data are housed
cdmDatabaseSchema <- "main"

# The schema where your achilles results are or will be housed
resultsDatabaseSchema <- "qc_result"

# The schema where your vocabulary tables are housed, typically the same as the cdmDatabaseSchema
vocabDatabaseSchema <- cdmDatabaseSchema

# A unique, identifiable name for your database
cdmSourceName <- "export"

# The version of the OMOP CDM you are currently on, v5.3 and v5.4 are supported.
cdmVersion <- "5.4"

# Whether the function should append existing Achilles tables or create new ones
appendAchilles <- FALSE

# The schema where any missing achilles analyses should be written. Only set if appendAchilles = FALSE
writeTo <- "qc_result"

# Whether to round to the 10s or 100s place. Valid inputs are 10 or 100, default is 10.
roundTo <- 10

# Vector of concepts to exclude from the output. Note: No patient-level data is pulled as part of the package or included as part of the output
excludedConcepts <- c()

# Whether the DQD should be run as part of the profile exercise
addDQD <- FALSE


################################################################################
## Run achilles
result <- Achilles::achilles(
connectionDetails,
cdmDatabaseSchema = "main",
cdmVersion = cdmVersion,
createTable = TRUE
)


##
##


if (!require("DbDiagnostics")){
remotes::install_github("OHDSI/DbDiagnostics")
}

if (!require("DatabaseConnector")){
remotes::install_github("OHDSI/DatabaseConnector")
}

# Get started: https://ohdsi.github.io/DbDiagnostics/articles/RunAndUploadDbProfile.html
# Call the library
library(DbDiagnostics)

# Turn off the connection pane to speed up run time
options(connectionObserver = NULL)



# Create connection details
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "sqlite",
server = sqliteDb,
)

#\c export;
#CREATE SCHEMA reference;
#CREATE SCHEMA qc_result;

# The schema where your CDM-structured data are housed
cdmDatabaseSchema <- "main"

# The schema where your achilles results are or will be housed
resultsDatabaseSchema <- "qc_result"

# The schema where your vocabulary tables are housed, typically the same as the cdmDatabaseSchema
vocabDatabaseSchema <- cdmDatabaseSchema

# A unique, identifiable name for your database
cdmSourceName <- "export"

# The version of the OMOP CDM you are currently on, v5.3 and v5.4 are supported.
cdmVersion <- "5.4"

# Whether the function should append existing Achilles tables or create new ones
appendAchilles <- FALSE

# The schema where any missing achilles analyses should be written. Only set if appendAchilles = FALSE
writeTo <- "qc_result"

# Whether to round to the 10s or 100s place. Valid inputs are 10 or 100, default is 10.
roundTo <- 10

# Vector of concepts to exclude from the output. Note: No patient-level data is pulled as part of the package or included as part of the output
excludedConcepts <- c()

# Whether the DQD should be run as part of the profile exercise
addDQD <- FALSE


################################################################################
## Run achilles
result <- Achilles::achilles(
connectionDetails,
cdmDatabaseSchema = "main",
resultsDatabaseSchema = "main",
# sourceName = "export",
cdmVersion = cdmVersion,
createTable = TRUE
)

##
################################################################################


## Check

connection <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::getTableNames(connection, databaseSchema = "main")
DatabaseConnector::disconnect(connection)
cdmDatabaseSchema <- "main" # the fully qualified database schema name of the CDM
resultsDatabaseSchema <- "main" # the fully qualified database schema name of the results s

cdmSourceName <- "main" # a human readable name for your CDM source
cdmVersion <- "5.4" # the CDM version you are targetting. Currently supports 5.2, 5.3, and 5.4
# determine how many threads (concurrent SQL sessions) to use ----------------------------------------
numThreads <- 1 # on Redshift, 3 seems to work well
# specify if you want to execute the queries or inspect them ------------------------------------------
sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries
sqlOnlyIncrementalInsert <- FALSE # set to TRUE if you want the generated SQL queries to calculate DQD r
sqlOnlyUnionCount <- 1 # in sqlOnlyIncrementalInsert mode, the number of check sqls to union in a singl
# NOTES specific to sqlOnly <- TRUE option ------------------------------------------------------------
# 1. You do not need a live database connection. Instead, connectionDetails only needs these parameters
# connectionDetails <- DatabaseConnector::createConnectionDetails(
# dbms = "", # specify your dbms
# )
# 2. Since these are fully functional queries, this can help with debugging.
# 3. In the results output by the sqlOnlyIncrementalInsert queries, placeholders are populated for execu
# 4. In order to use the generated SQL to insert metadata and check results into output table, you must
# where should the results and logs go? ----------------------------------------------------------------
# logging type -------------------------------------------------------------------------------------
verboseMode <- TRUE # set to FALSE if you don't want the logs to be printed to the console
# write results to table? ------------------------------------------------------------------------------
writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table in the results schema
# specify the name of the results table (used when writeToTable = TRUE and when sqlOnlyIncrementalInsert
writeTableName <- "dqdashboard_results"
# write results to a csv file? -----------------------------------------------------------------------
writeToCsv <- FALSE # set to FALSE if you want to skip writing to csv file
csvFile <- "" # only needed if writeToCsv is set to TRUE
# if writing to table and using Redshift, bulk loading can be initialized ------------------------------
# Sys.setenv("AWS_ACCESS_KEY_ID" = "",
# "AWS_SECRET_ACCESS_KEY" = "",
# "AWS_DEFAULT_REGION" = "",
# "AWS_BUCKET_NAME" = "",
# "AWS_OBJECT_KEY" = "",
# "AWS_SSE_TYPE" = "AES256",
# "USE_MPP_BULK_LOAD" = TRUE)
# which DQ check levels to run -------------------------------------------------------------------
checkLevels <- c("TABLE", "FIELD", "CONCEPT")
2# which DQ checks to run? ------------------------------------
checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv
# want to EXCLUDE a pre-specified list of checks? run the following code:
#
# checksToExclude <- c() # Names of check types to exclude from your DQD run
# allChecks <- DataQualityDashboard::listDqChecks()
# checkNames <- allChecks$checkDescriptions %>%
# subset(!(checkName %in% checksToExclude)) %>%
# select(checkName)
# which CDM tables to exclude? ------------------------------------
tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "DEATH", "LOCATION", "PAYER_PLAN_PERIOD", "NOTE", "NOTE_NLP", "SPECIMEM", "VISIT_DETAIL", "COST")
# run the job --------------------------------------------------------------------------------------
DataQualityDashboard::executeDqChecks(
connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
resultsDatabaseSchema = resultsDatabaseSchema,
cdmSourceName = cdmSourceName,
cdmVersion = cdmVersion,
numThreads = numThreads,
sqlOnly = sqlOnly,
sqlOnlyUnionCount = sqlOnlyUnionCount,
sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert,
outputFolder = outputFolder,
outputFile = outputFile,
verboseMode = verboseMode,
writeToTable = writeToTable,
writeToCsv = writeToCsv,
csvFile = csvFile,
checkLevels = checkLevels,
tablesToExclude = tablesToExclude,
checkNames = checkNames
)
# inspect logs ----------------------------------------------------------------------------
# ParallelLogger::launchLogViewer(
# logFileName = file.path(outputFolder,
# sprintf("log_DqDashboard_%s.txt", cdmSourceName)))
# # (OPTIONAL) if you want to write the JSON file to the results table separately ------------------------
# jsonFilePath <- paste0(outputFolder, "/"+outputFile
# )

64 changes: 64 additions & 0 deletions Resources/dqd-omop-5.4/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import http.server
import json
import os
import shlex
import socketserver
import subprocess
import logging
from typing import Dict
from urllib.parse import urlparse, parse_qs

PORT = 8080

logging.getLogger().setLevel(10)

class SimpleHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
parsed_path = urlparse(self.path)
query_params = parse_qs(parsed_path.query)

response = f"<html><body>"
response += f"<h1>Requested URI: {parsed_path.path}</h1>"
response += f"<h2>HTTP Method: GET</h2>"
response += f"<h3>Query Parameters: {query_params}</h3>"
response += "</body></html>"

self.reply(200, response)

def do_POST(self):
parsed_path = urlparse(self.path)
if parsed_path.path != "/dqd-omop-5.4":
return self.reply(404, "Unknown route specified {}".format(parsed_path.path))
query_params = parse_qs(parsed_path.query)
filename = '/data/' + str(query_params['file'][0])
if not os.path.isfile(filename):
return self.reply(404, "File {} not found".format(filename))
id = str(query_params['id'][0])
result_filename = "results-" + id + ".json"
log_filename = "qc-" + id + ".log"

cmd = "bash -c 'export LD_LIBRARY_PATH=/usr/lib/jvm/java-21-openjdk-amd64/lib/server/; cd /data/; Rscript /app/qc.r "+filename+" /data/ " + result_filename + " > " + log_filename + " 2>&1'"
logging.info(cmd)
args = shlex.split(cmd)
p = subprocess.run(args, capture_output=True)
logs = str(p.stdout.decode('utf-8'))
logging.info(logs)

if p.returncode > 0:
return self.reply(500, "Error running DQD:\n{}".format(logs))

response: Dict[str, str] = {"result": result_filename, "log": log_filename}

self.reply(200, json.dumps(response))

def reply(self, code: int, response: str):
self.send_response(code)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(response.encode())

if __name__ == "__main__":
with socketserver.TCPServer(("", PORT), SimpleHTTPRequestHandler) as httpd:
print(f"Serving on port {PORT}")
# httpd.timeout = 1800 # timeout to generate results.json
httpd.serve_forever()
8 changes: 8 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,11 @@ services:
UNISON_DSN: "postgresql+psycopg2://postgres:postgres@db/postgres"
volumes:
- ./csv:/loader/csv

dqd-omop-5.4:
container_name: runner-from-csv-dqd-omop-5.4
image: entsupml/unison-runner-dqd-omop-5.4
ports: [8080]
volumes:
- ./var:/data:delegated
- ./Resources/dqd-omop-5.4/:/app