| Title: | Assessing the Integrity and Trustworthiness of Clinical Trials Data |
|---|---|
| Description: | The integrity package implements the IPD Integrity Tool, a structured and transparent framework for evaluating the integrity of individual participant data (IPD) from randomised trials (see Hunter et al. (2024) <doi:10.1002/jrsm.1738> and <doi:10.32614/RJ-2017-008>). It supports users to identify potential issues, such as unusual data patterns, implausible values, lack of expected correlations, date violations, and inconsistencies. The package provides reproducible workflows for screening, documenting and summarising integrity concerns, and may be applied by evidence synthesists, editors, and others to determine whether a randomised trial may be considered sufficiently trustworthy to contribute to the evidence base that informs policy and practice. |
| Authors: | Sol Libesman [aut, cre], Kylie Hunter [aut], David Nguyen [aut], Dario Strbenac [aut], Anne Lene Seidler [aut], Jie Kang [aut] |
| Maintainer: | Sol Libesman <[email protected]> |
| License: | GPL-3 |
| Version: | 1.0.1 |
| Built: | 2026-05-27 09:06:52 UTC |
| Source: | https://github.com/cran/integrity |
Internal function documentation for developers. Levene's test for differential variability.
.differential_variability(dataset_subset, intervention, alpha).differential_variability(dataset_subset, intervention, alpha)
dataset_subset |
A |
intervention |
Column name of intervention indicator. |
alpha |
p-value signficance threshold. |
One-row data.frame with a Pass or Fail indicator.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) dataset <- integrity:::.prepare_data(dataset, info) numeric_columns <- info$baseline$numeric dataset_subset <- dataset[, c(numeric_columns, info$intervention)] integrity:::.differential_variability(dataset_subset, info$intervention, 0.05)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) dataset <- integrity:::.prepare_data(dataset, info) numeric_columns <- info$baseline$numeric dataset_subset <- dataset[, c(numeric_columns, info$intervention)] integrity:::.differential_variability(dataset_subset, info$intervention, 0.05)
Internal function documentation for developers. Dates are converted into days of the week and tested for association to intervention status using chisq.test.
.imbalance_day_intervention(dataset, intervention, intervention_date, unexpected, alpha).imbalance_day_intervention(dataset, intervention, intervention_date, unexpected, alpha)
dataset |
A |
intervention |
Column name of column storing intervention status indicator. |
intervention_date |
Column name of column storing intervention date. |
unexpected |
List of elements specifying implausible values. Names of list are column names. One must be |
alpha |
p-value signficance threshold. |
A list of length two. check_table: One-row data.frame with a Pass or Fail indicator. images: Bar chart of days of week. Bars are coloured by intervention status.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.imbalance_day_intervention(dataset, info$intervention, info$enrollment$randomisation, info$unexpected, 0.05)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.imbalance_day_intervention(dataset, info$intervention, info$enrollment$randomisation, info$unexpected, 0.05)
Internal function documentation for developers. Each column is checked for violations.
.implausible_values(dataset, participantID, unexpected, enrollment).implausible_values(dataset, participantID, unexpected, enrollment)
dataset |
A |
participantID |
Column name of column storing participant IDs. |
unexpected |
List of elements specifying implausible values. Names of list are column names |
enrollment |
Column name of column storing enrollment dates. |
A data.frame with one row for each violation or one row with Pass if no rows violated the check.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.implausible_values(dataset, info$participantID, info$unexpected, info$enrollment)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.implausible_values(dataset, info$participantID, info$unexpected, info$enrollment)
Internal function documentation for developers. Firstly, the function checks all expected variables are present as column names. Then, it converts any columns defined as categorical to factors. Finally, it removes any columns that have all missing values.
.prepare_data(dataset, info).prepare_data(dataset, info)
dataset |
A |
info |
A named list of column names corresponding to different aspects of the clinical trial. See the vignette for detailed requirements. |
If no missing colums, a data.frame that has been filtered for columns containing all missing values.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.prepare_data(dataset, info)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) integrity:::.prepare_data(dataset, info)
Internal function documentation for developers. Essentially a wrapper around get_dupes of janitor.
.repeating_baseline(dataset_subset, type = c("across", "within", "across_rare")).repeating_baseline(dataset_subset, type = c("across", "within", "across_rare"))
dataset_subset |
A |
type |
If |
A data.frame with one row for each repetition or just one row reporting Pass status for the check.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) dataset_subset <- dataset[, unlist(info$baseline)] integrity:::.repeating_baseline(dataset_subset)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) dataset_subset <- dataset[, unlist(info$baseline)] integrity:::.repeating_baseline(dataset_subset)
Internal function documentation for developers. Creates a distribution plot of terminal digits
.terminal_digits(dataset_subset).terminal_digits(dataset_subset)
dataset_subset |
A |
A ggplot2 plot.
library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) numeric_columns <- info$baseline$numeric dataset_subset <- dataset[, unlist(info$baseline)] integrity:::.terminal_digits(dataset_subset)library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) infoPath <- system.file("extdata", "variables.yaml", package = "integrity") info <- read_yaml(infoPath) numeric_columns <- info$baseline$numeric dataset_subset <- dataset[, unlist(info$baseline)] integrity:::.terminal_digits(dataset_subset)
Reads a metadata workbook template and converts it into the named list structure
required by run_checks().
read_metadata_excel(path, sheet = 1)read_metadata_excel(path, sheet = 1)
path |
Path to an Excel workbook containing metadata rows. |
sheet |
Sheet name or position to read. Default: 1. |
A named list suitable for the info argument of run_checks().
if(interactive()) { example_path <- system.file("extdata", "variables_template.xlsx", package = "integrity") dataset_info <- read_metadata_excel(example_path) names(dataset_info) }if(interactive()) { example_path <- system.file("extdata", "variables_template.xlsx", package = "integrity") dataset_info <- read_metadata_excel(example_path) names(dataset_info) }
Sources an R script template and returns the metadata list required by
run_checks().
read_metadata_r(path, object_name = "dataset_info")read_metadata_r(path, object_name = "dataset_info")
path |
Path to an R script containing a metadata object. |
object_name |
Name of the object to return from the R script. Default: |
A named list suitable for the info argument of run_checks().
if(interactive()) { example_path <- system.file("extdata", "variables_template.R", package = "integrity") dataset_info <- read_metadata_r(example_path) names(dataset_info) }if(interactive()) { example_path <- system.file("extdata", "variables_template.R", package = "integrity") dataset_info <- read_metadata_r(example_path) names(dataset_info) }
Depending on the characteristics of the variables, some test may be skipped if the data type required for the test is not present.
run_checks(dataset, info, alpha = 0.05)run_checks(dataset, info, alpha = 0.05)
dataset |
A |
info |
A named list of column names corresponding to different aspects of the clinical trial. See the vignette for detailed requirements. |
alpha |
Default: 0.05. For checks which use a statistical test, the p-value threshold at which to report a failure. |
A list with the element named "check_table" having the table of passes and fails,
the element named "detail_tables" storing additional per-variable results for selected checks,
the element named "images" storing ggplot2 plots and the element named "summary_table" having an overview table of the
baseline and outcome variables split by intervention.
if(interactive()) { library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) example_path <- system.file("extdata", "variables.yaml", package = "integrity") dataset_info <- read_yaml(example_path) result <- run_checks(dataset, dataset_info) names(result) }if(interactive()) { library(readxl) examplePath <- system.file("extdata", "dataset.xlsx", package = "integrity") dataset <- read_excel(examplePath) library(yaml) example_path <- system.file("extdata", "variables.yaml", package = "integrity") dataset_info <- read_yaml(example_path) result <- run_checks(dataset, dataset_info) names(result) }