Title: | Workflow for Open Reproducible Code in Science |
---|---|
Description: | Create reproducible and transparent research projects in 'R'. This package is based on the Workflow for Open Reproducible Code in Science (WORCS), a step-by-step procedure based on best practices for Open Science. It includes an 'RStudio' project template, several convenience functions, and all dependencies required to make your project reproducible and transparent. WORCS is explained in the tutorial paper by Van Lissa, Brandmaier, Brinkman, Lamprecht, Struiksma, & Vreede (2021). <doi:10.3233/DS-210031>. |
Authors: | Caspar J. Van Lissa [aut, cre] , Aaron Peikert [aut] , Andreas M. Brandmaier [aut] |
Maintainer: | Caspar J. Van Lissa <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.1.16 |
Built: | 2024-11-06 11:14:59 UTC |
Source: | https://github.com/cjvanlissa/worcs |
Add a specific endpoint to the WORCS project file. Endpoints are files that are expected to be exactly reproducible (e.g., a manuscript, figure, table, et cetera). Reproducibility is checked by ensuring the endpoint's checksum is unchanged.
add_endpoint(filename = NULL, worcs_directory = ".", verbose = TRUE, ...)
add_endpoint(filename = NULL, worcs_directory = ".", verbose = TRUE, ...)
filename |
Character, indicating the file to be tracked as endpoint. Default: NULL. |
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
... |
Additional arguments. |
No return value. This function is called for its side effects.
snapshot_endpoints
check_endpoints
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Adds an Rmarkdown manuscript to a 'worcs' project.
add_manuscript( worcs_directory = ".", manuscript = "APA6", remote_repo = NULL, verbose = TRUE, ... )
add_manuscript( worcs_directory = ".", manuscript = "APA6", remote_repo = NULL, verbose = TRUE, ... )
worcs_directory |
Character, indicating the directory in which to create the manuscript files. Default: '.', which points to the current working directory. |
manuscript |
Character, indicating what template to use for the
'R Markdown' manuscript. Default: 'APA6'. Available choices include:
|
remote_repo |
Character, 'https' link to the remote repository for
this project. This link should have the form |
verbose |
Logical. Whether or not to print messages to the console during project creation. Default: TRUE |
... |
Additional arguments passed to and from functions. |
Available choices include the following manuscript templates:
'APA6'
An APA6 style template from the papaja
package
'github_document'
A github_document
from the rmarkdown
package
'acm_article'
acm style template from the rtices
package
'acs_article'
acs style template from the rtices
package
'aea_article'
aea style template from the rtices
package
'agu_article'
agu style template from the rtices
package
'ajs_article'
ajs style template from the rtices
package
'amq_article'
amq style template from the rtices
package
'ams_article'
ams style template from the rtices
package
'arxiv_article'
arxiv style template from the rtices
package
'asa_article'
asa style template from the rtices
package
'bioinformatics_article'
bioinformatics style template from the rtices
package
'biometrics_article'
biometrics style template from the rtices
package
'copernicus_article'
copernicus style template from the rtices
package
'ctex_article'
ctex style template from the rtices
package
'elsevier_article'
elsevier style template from the rtices
package
'frontiers_article'
frontiers style template from the rtices
package
'glossa_article'
glossa style template from the rtices
package
'ieee_article'
ieee style template from the rtices
package
'ims_article'
ims style template from the rtices
package
'informs_article'
informs style template from the rtices
package
'iop_article'
iop style template from the rtices
package
'isba_article'
isba style template from the rtices
package
'jasa_article'
jasa style template from the rtices
package
'jedm_article'
jedm style template from the rtices
package
'joss_article'
joss style template from the rtices
package
'jss_article'
jss style template from the rtices
package
'lipics_article'
lipics style template from the rtices
package
'mdpi_article'
mdpi style template from the rtices
package
'mnras_article'
mnras style template from the rtices
package
'oup_article'
oup style template from the rtices
package
'peerj_article'
peerj style template from the rtices
package
'pihph_article'
pihph style template from the rtices
package
'plos_article'
plos style template from the rtices
package
'pnas_article'
pnas style template from the rtices
package
'rjournal_article'
rjournal style template from the rtices
package
'rsos_article'
rsos style template from the rtices
package
'rss_article'
rss style template from the rtices
package
'sage_article'
sage style template from the rtices
package
'sim_article'
sim style template from the rtices
package
'springer_article'
springer style template from the rtices
package
'tf_article'
tf style template from the rtices
package
'trb_article'
trb style template from the rtices
package
'wellcomeor_article'
wellcomeor style template from the rtices
package
No return value. This function is called for its side effects.
the_test <- "worcs_manuscript" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) file.create(file.path(tempdir(), the_test, ".worcs")) add_manuscript(file.path(tempdir(), the_test), manuscript = "None") setwd(old_wd) unlink(file.path(tempdir(), the_test))
the_test <- "worcs_manuscript" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) file.create(file.path(tempdir(), the_test, ".worcs")) add_manuscript(file.path(tempdir(), the_test), manuscript = "None") setwd(old_wd) unlink(file.path(tempdir(), the_test))
Adds an Rmarkdown preregistration template to a 'worcs' project.
add_preregistration( worcs_directory = ".", preregistration = "cos_prereg", verbose = TRUE, ... )
add_preregistration( worcs_directory = ".", preregistration = "cos_prereg", verbose = TRUE, ... )
worcs_directory |
Character, indicating the directory in which to create the manuscript files. Default: '.', which points to the current working directory. |
preregistration |
Character, indicating what template to use for the
preregistration. Default: |
verbose |
Logical. Whether or not to print messages to the console during project creation. Default: TRUE |
... |
Additional arguments passed to and from functions. |
Available choices include the templates from the
prereg
package, and several unique templates
included with worcs
:
'PSS'
Preregistration and Sharing Software (Krypotos, Klugkist, Mertens, & Engelhard, 2019)
'Secondary'
Preregistration for secondary analyses (Mertens & Krypotos, 2019)
'aspredicted_prereg'
aspredicted template from the prereg
package
'brandt_prereg'
brandt template from the prereg
package
'cos_prereg'
cos template from the prereg
package
'fmri_prereg'
fmri template from the prereg
package
'prp_quant_prereg'
prp_quant template from the prereg
package
'psyquant_prereg'
psyquant template from the prereg
package
'rr_prereg'
rr template from the prereg
package
'vantveer_prereg'
vantveer template from the prereg
package
No return value. This function is called for its side effects.
the_test <- "worcs_prereg" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) file.create(file.path(tempdir(), the_test, ".worcs")) add_preregistration(file.path(tempdir(), the_test), preregistration = "cos_prereg") setwd(old_wd) unlink(file.path(tempdir(), the_test))
the_test <- "worcs_prereg" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) file.create(file.path(tempdir(), the_test, ".worcs")) add_preregistration(file.path(tempdir(), the_test), preregistration = "cos_prereg") setwd(old_wd) unlink(file.path(tempdir(), the_test))
Add a recipe to a WORCS project file to generate its endpoints.
add_recipe( worcs_directory = ".", recipe = "rmarkdown::render('manuscript/manuscript.Rmd')", terminal = FALSE, verbose = TRUE, ... )
add_recipe( worcs_directory = ".", recipe = "rmarkdown::render('manuscript/manuscript.Rmd')", terminal = FALSE, verbose = TRUE, ... )
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
recipe |
Character string, indicating the function call to evaluate in order to reproduce the endpoints of the WORCS project. |
terminal |
Logical, indicating whether or not to evaluate the |
verbose |
Logical. Whether or not to print status messages to the
console. Default: |
... |
Additional arguments. |
No return value. This function is called for its side effects.
add_endpoint
snapshot_endpoints
check_endpoints
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_recipe") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_recipe() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_recipe") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_recipe() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
This function adds a user-specified synthetic data resource for public use to a WORCS project with closed data.
add_synthetic( data, synthetic_name = paste0("synthetic_", original_name), original_name, worcs_directory = ".", verbose = TRUE, ... )
add_synthetic( data, synthetic_name = paste0("synthetic_", original_name), original_name, worcs_directory = ".", verbose = TRUE, ... )
data |
A |
synthetic_name |
Character, naming the file synthetic data should be
written to. By
default, prepends |
original_name |
Character, naming an existing data resource in the WORCS
project with which to associate the synthetic |
worcs_directory |
Character, indicating the WORCS project directory to
which to save data. The default value |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
... |
Additional arguments passed to and from functions. |
Returns NULL
invisibly. This
function is called for its side effects.
open_data closed_data save_data
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_synthetic") dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") # Prepare data df <- iris[1:3, ] # Run closed_data without synthetic closed_data(df, codebook = NULL, synthetic = FALSE) # Manually add synthetic add_synthetic(df, original_name = "df.csv") # Remove original from file and environment file.remove("df.csv") rm(df) # See that load_data() now loads the synthetic file load_data() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "add_synthetic") dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") # Prepare data df <- iris[1:3, ] # Run closed_data without synthetic closed_data(df, codebook = NULL, synthetic = FALSE) # Manually add synthetic add_synthetic(df, original_name = "df.csv") # Remove original from file and environment file.remove("df.csv") rm(df) # See that load_data() now loads the synthetic file load_data() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Add a computational pipeline to a worcs
project using the
targets
and tarchetypes
packages (which must be installed). See those
packages for extensive documentation.
add_targets(worcs_directory = ".", verbose = TRUE, ...)
add_targets(worcs_directory = ".", verbose = TRUE, ...)
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
verbose |
Logical. Whether or not to print status messages to the
console. Default: |
... |
Arguments passed to |
No return value. This function is called for its side effects.
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "targets") dir.create(test_dir) setwd(test_dir) file.create(".worcs") add_targets() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "targets") dir.create(test_dir) setwd(test_dir) file.create(".worcs") add_targets() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Check that the checksums of all endpoints in a WORCS project match their snapshots.
check_endpoints(worcs_directory = ".", verbose = TRUE, ...)
check_endpoints(worcs_directory = ".", verbose = TRUE, ...)
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
... |
Additional arguments. |
Returns a logical value (TRUE/FALSE) invisibly.
add_endpoint
snapshot_endpoints
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "check_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") check_endpoints() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "check_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") check_endpoints() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Evaluates whether a project meets the criteria of the WORCS
checklist (see worcs_checklist
).
check_worcs(path = ".", verbose = TRUE)
check_worcs(path = ".", verbose = TRUE)
path |
Character. Path to a WORCS project folder (a project with a
|
verbose |
Logical. Whether or not to show status messages while
evaluating the checklist. Default: |
A data.frame
with a description of the criteria, and a column
with evaluations ($pass
). For criteria that must be evaluated
manually, $pass
will be FALSE
.
example_dir <- file.path(tempdir(), "badge") dir.create(example_dir) write("a", file.path(example_dir, ".worcs")) check_worcs(path = example_dir)
example_dir <- file.path(tempdir(), "badge") dir.create(example_dir) write("a", file.path(example_dir, ".worcs")) check_worcs(path = example_dir)
This function checks that all worcs dependencies are correctly installed, and suggests how to remedy any missing dependencies.
check_worcs_installation(what = "all") check_dependencies(package = "worcs") check_git() check_github(pat = TRUE, ssh = FALSE) check_ssh() check_tinytext() check_rmarkdown() check_renv()
check_worcs_installation(what = "all") check_dependencies(package = "worcs") check_git() check_github(pat = TRUE, ssh = FALSE) check_ssh() check_tinytext() check_rmarkdown() check_renv()
what |
Character vector indicating which dependencies to check. Default:
|
package |
Atomic character vector, indicating for which package to check the dependencies. |
pat |
Logical, whether to run tests for the existence and functioning of a GitHub Personal Access Token (PAT). This is the preferred method of authentication, so defaults to TRUE. |
ssh |
Logical, whether to run tests for the existence and functioning of an SSH key. This method of authentication is not recommended, so defaults to FALSE. |
Logical, indicating whether all checks passed or not.
check_worcs_installation("none")
check_worcs_installation("none")
This is a wrapper for render
. First, this function
parses the citations in the document, converting citations
marked with double at sign, e.g.: @@reference2020
, into normal
citations, e.g.: @reference2020
. Then, it renders the file.
cite_all(...)
cite_all(...)
... |
All arguments are passed to |
Returns NULL
invisibly. This
function is called for its side effect of rendering an
'R Markdown' file.
# NOTE: Do not use this function interactively, as in the example below. # Only specify it as custom knit function in an 'R Markdown' file, like so: # knit: worcs::cite_all if (rmarkdown::pandoc_available("2.0")){ file_name <- file.path(tempdir(), "citeall.Rmd") loc <- rmarkdown::draft(file_name, template = "github_document", package = "rmarkdown", create_dir = FALSE, edit = FALSE) write(c("", "Optional reference: @reference2020"), file = file_name, append = TRUE) cite_all(file_name) }
# NOTE: Do not use this function interactively, as in the example below. # Only specify it as custom knit function in an 'R Markdown' file, like so: # knit: worcs::cite_all if (rmarkdown::pandoc_available("2.0")){ file_name <- file.path(tempdir(), "citeall.Rmd") loc <- rmarkdown::draft(file_name, template = "github_document", package = "rmarkdown", create_dir = FALSE, edit = FALSE) write(c("", "Optional reference: @reference2020"), file = file_name, append = TRUE) cite_all(file_name) }
This is a wrapper for render
. First, this function
parses the citations in the document, removing citations
marked with double at sign, e.g.: @@reference2020
. Then, it renders
the file.
cite_essential(...)
cite_essential(...)
... |
All arguments are passed to |
Returns NULL
invisibly. This
function is called for its side effect of rendering an
'R Markdown' file.
# NOTE: Do not use this function interactively, as in the example below. # Only specify it as custom knit function in an R Markdown file, like so: # knit: worcs::cite_all if (rmarkdown::pandoc_available("2.0")){ file_name <- tempfile("citeessential", fileext = ".Rmd") rmarkdown::draft(file_name, template = "github_document", package = "rmarkdown", create_dir = FALSE, edit = FALSE) write(c("", "Optional reference: @reference2020"), file = file_name, append = TRUE) cite_essential(file_name) }
# NOTE: Do not use this function interactively, as in the example below. # Only specify it as custom knit function in an R Markdown file, like so: # knit: worcs::cite_all if (rmarkdown::pandoc_available("2.0")){ file_name <- tempfile("citeessential", fileext = ".Rmd") rmarkdown::draft(file_name, template = "github_document", package = "rmarkdown", create_dir = FALSE, edit = FALSE) write(c("", "Optional reference: @reference2020"), file = file_name, append = TRUE) cite_essential(file_name) }
This function saves a data.frame as a .csv
file (using
write.csv
), stores a checksum in '.worcs',
appends the .gitignore
file to exclude filename
, and saves a
synthetic copy of data
for public use. To generate these synthetic
data, the function synthetic
is used.
closed_data( data, filename = paste0(deparse(substitute(data)), ".csv"), codebook = paste0("codebook_", deparse(substitute(data)), ".Rmd"), value_labels = paste0("value_labels_", deparse(substitute(data)), ".yml"), worcs_directory = ".", synthetic = TRUE, save_expression = write.csv(x = data, file = filename, row.names = FALSE), load_expression = read.csv(file = filename, stringsAsFactors = TRUE), ... )
closed_data( data, filename = paste0(deparse(substitute(data)), ".csv"), codebook = paste0("codebook_", deparse(substitute(data)), ".Rmd"), value_labels = paste0("value_labels_", deparse(substitute(data)), ".yml"), worcs_directory = ".", synthetic = TRUE, save_expression = write.csv(x = data, file = filename, row.names = FALSE), load_expression = read.csv(file = filename, stringsAsFactors = TRUE), ... )
data |
A data.frame to save. |
filename |
Character, naming the file data should be written to. By
default, constructs a filename from the name of the object passed to
|
codebook |
Character, naming the file the codebook should be written to.
An 'R Markdown' codebook will be created and rendered to
|
value_labels |
Character, naming the file the value labels of factors
and ordinal variables should be written to.
By default, constructs a filename from the name of the object passed to
|
worcs_directory |
Character, indicating the WORCS project directory to
which to save data. The default value |
synthetic |
Logical, indicating whether or not to create a synthetic
dataset using the |
save_expression |
An R-expression used to save the |
load_expression |
An R-expression used to load the |
... |
Additional arguments passed to and from functions. |
Returns NULL
invisibly. This
function is called for its side effects.
open_data closed_data save_data
old_wd <- getwd() test_dir <- file.path(tempdir(), "data") dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:3, ] closed_data(df, codebook = NULL) setwd(old_wd) unlink(test_dir, recursive = TRUE)
old_wd <- getwd() test_dir <- file.path(tempdir(), "data") dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:3, ] closed_data(df, codebook = NULL) setwd(old_wd) unlink(test_dir, recursive = TRUE)
For each column of x
, this function checks whether value
labels exist in value_labels
. If so, integer values are replaced with these
value labels.
data_label( x, variables = names(x), value_labels = read_yaml(paste0("value_labels_", substitute(x), ".yml")) )
data_label( x, variables = names(x), value_labels = read_yaml(paste0("value_labels_", substitute(x), ".yml")) )
x |
A |
variables |
Column names of |
value_labels |
A list with value labels, typically read from metadata
generated by |
A data.frame
.
## Not run: if(interactive()){ labs <- list(x = list(class = "factor", `1` = "a", `2` = "b")) df <- data.frame(x = 1:2) data_label(df, value_labels = labs) } ## End(Not run)
## Not run: if(interactive()){ labs <- list(x = list(class = "factor", `1` = "a", `2` = "b")) df <- data.frame(x = 1:2) data_label(df, value_labels = labs) } ## End(Not run)
Coerces factor
and ordered
variables to class integer
.
data_unlabel(x, variables = names(x)[sapply(x, inherits, what = "factor")])
data_unlabel(x, variables = names(x)[sapply(x, inherits, what = "factor")])
x |
A |
variables |
Column names of |
A data.frame
.
## Not run: if(interactive()){ df <- data.frame(x = factor(c("a", "b"))) data_unlabel(df) } ## End(Not run)
## Not run: if(interactive()){ df <- data.frame(x = factor(c("a", "b"))) data_unlabel(df) } ## End(Not run)
Provide descriptive statistics for a dataset.
descriptives(x, ...)
descriptives(x, ...)
x |
An object for which a method exists. |
... |
Additional arguments. |
A data.frame
with descriptive statistics for x
.
descriptives(iris)
descriptives(iris)
Export project to .zip file
export_project(zipfile = NULL, worcs_directory = ".", open_data = TRUE)
export_project(zipfile = NULL, worcs_directory = ".", open_data = TRUE)
zipfile |
Character. Path to a |
worcs_directory |
Character. Path to the WORCS project directory to
export. Defaults to |
open_data |
Logical. Whether or not to include the original data,
'data.csv', if this file exists. If |
Logical, indicating the success of the operation. This function is
called for its side effect of creating a .zip
file.
export_project(worcs_directory = tempdir())
export_project(worcs_directory = tempdir())
Arguments passed through ...
are added to the .gitignore
file. Elements already present in the file are modified.
When ignore = TRUE
, the arguments are added to the .gitignore file,
which will cause 'Git' to not track them.
When ignore = FALSE
, the arguments are prepended with !
,
This works as a "double negation", and will cause 'Git' to track the files.
git_ignore(..., ignore = TRUE, repo = ".")
git_ignore(..., ignore = TRUE, repo = ".")
... |
Any number of character arguments, representing files to be added to the .gitignore file. |
ignore |
Logical. Whether or not 'Git' should ignore these files. |
repo |
a path to an existing repository, or a git_repository object as returned by git_open, git_init or git_clone. |
No return value. This function is called for its side effects.
dir.create(".git") git_ignore("ignorethis.file") unlink(".git", recursive = TRUE) file.remove(".gitignore")
dir.create(".git") git_ignore("ignorethis.file") unlink(".git", recursive = TRUE) file.remove(".gitignore")
This function is a wrapper for
git_add
, git_commit
,
and
git_push
. It adds all locally changed files to the
staging area of the local 'Git' repository, then commits these changes
(with an optional) message
, and then pushes them to a remote
repository. This is used for making a "cloud backup" of local changes.
Do not use this function when working with privacy sensitive data,
or any other file that should not be pushed to a remote repository.
The git_add
argument
force
is disabled by default,
to avoid accidentally committing and pushing a file that is listed in
.gitignore
.
git_update( message = paste0("update ", Sys.time()), files = ".", repo = ".", author, committer, remote, refspec, password, ssh_key, mirror, force, verbose = TRUE )
git_update( message = paste0("update ", Sys.time()), files = ".", repo = ".", author, committer, remote, refspec, password, ssh_key, mirror, force, verbose = TRUE )
message |
a commit message |
files |
vector of paths relative to the git root directory. Use "." to stage all changed files. |
repo |
a path to an existing repository, or a git_repository object as returned by git_open, git_init or git_clone. |
author |
A git_signature value, default is git_signature_default. |
committer |
A git_signature value, default is same as author |
remote |
name of a remote listed in git_remote_list() |
refspec |
string with mapping between remote and local refs |
password |
a string or a callback function to get passwords for authentication or password protected ssh keys. Defaults to askpass which checks getOption('askpass'). |
ssh_key |
path or object containing your ssh private key. By default we look for keys in ssh-agent and credentials::ssh_key_info. |
mirror |
use the –mirror flag |
force |
use the –force flag |
verbose |
display some progress info while downloading |
No return value. This function is called for its side effects.
git_update()
git_update()
This function is a wrapper for
git_config_global_set
.
It sets two name/value pairs at
once: name = "user.name"
is set to the value of the name
argument, and name = "user.email"
is set to the value of the
email
argument.
git_user(name, email, overwrite = !has_git_user(), verbose = TRUE)
git_user(name, email, overwrite = !has_git_user(), verbose = TRUE)
name |
Character. The user name you want to use with 'Git'. |
email |
Character. The email address you want to use with 'Git'. |
overwrite |
Logical. Whether or not to overwrite existing 'Git'
credentials. Use this to prevent code from accidentally overwriting existing
'Git' credentials. The default value uses |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
No return value. This function is called for its side effects.
do.call(git_user, worcs:::get_user())
do.call(git_user, worcs:::get_user())
Sets up a GitHub Action to perform continuous integration (CI)
for a WORCS project. CI automatically evaluates check_endpoints()
or
reproduce(check_endpoints = TRUE)
.
at each push or pull request.
github_action_check_endpoints(worcs_directory = ".") github_action_reproduce(worcs_directory = ".")
github_action_check_endpoints(worcs_directory = ".") github_action_reproduce(worcs_directory = ".")
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
No return value. This function is called for its side effects.
use_github_action
add_endpoint
check_endpoints
Check whether the values user.name
and user.email
exist exist in the 'Git' global configuration settings.
Uses git_config_global
.
has_git_user()
has_git_user()
Logical, indicating whether 'Git' global configuration settings could
be retrieved, and contained the values
user.name
and user.email
.
has_git_user()
has_git_user()
Scans the WORCS project file for data that have been saved using
open_data
or closed_data
, and loads these data
into the global (working) environment. The function will load the original
data if available on the current system. If only a synthetic dataset is
available, this function loads the synthetic data.
The name of the object containing the data is derived from the file name by
removing the file extension, and, when applicable, the prefix
"synthetic_"
. Thus, both "data.csv"
and
"synthetic_data.csv"
will be loaded into an object called data
.
load_data( worcs_directory = ".", to_envir = TRUE, envir = parent.frame(1), verbose = TRUE, use_metadata = TRUE )
load_data( worcs_directory = ".", to_envir = TRUE, envir = parent.frame(1), verbose = TRUE, use_metadata = TRUE )
worcs_directory |
Character, indicating the WORCS project directory from
which to load data. The default value |
to_envir |
Logical, indicating whether to load objects directly into
the environment, or return a |
envir |
The environment where the data should be loaded. The default
value |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
use_metadata |
Logical. Whether or not to use the codebook and value labels and attempt to coerce the class and values of variables to those recorded therein. Default: TRUE |
Returns a list invisibly. If to_envir = TRUE
, this list
contains the loaded data files. If to_envir = FALSE
, the list is
empty, and the loaded data files are attached directly to the global
environment.
test_dir <- file.path(tempdir(), "loaddata") old_wd <- getwd() dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:5, ] suppressWarnings(closed_data(df, codebook = NULL)) load_data() data rm("data") file.remove("data.csv") load_data() data setwd(old_wd) unlink(test_dir, recursive = TRUE)
test_dir <- file.path(tempdir(), "loaddata") old_wd <- getwd() dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:5, ] suppressWarnings(closed_data(df, codebook = NULL)) load_data() data rm("data") file.remove("data.csv") load_data() data setwd(old_wd) unlink(test_dir, recursive = TRUE)
Loads the designated project entry point into the default
editor, using file.edit
.
load_entrypoint(worcs_directory = ".", verbose = TRUE, ...)
load_entrypoint(worcs_directory = ".", verbose = TRUE, ...)
worcs_directory |
Character, indicating the WORCS project directory to
which to save data. The default value |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
... |
Additional arguments passed to |
No return value. This function is called for its side effects.
## Not run: # Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "entrypoint") dir.create(test_dir) setwd(test_dir) # Prepare worcs file and dummy entry point worcs:::write_worcsfile(".worcs", entry_point = "test.txt") writeLines("Hello world", con = file("test.txt", "w")) # Demonstrate load_entrypoint() load_entrypoint() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE) ## End(Not run)
## Not run: # Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "entrypoint") dir.create(test_dir) setwd(test_dir) # Prepare worcs file and dummy entry point worcs:::write_worcsfile(".worcs", entry_point = "test.txt") writeLines("Hello world", con = file("test.txt", "w")) # Demonstrate load_entrypoint() load_entrypoint() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE) ## End(Not run)
Creates a codebook for a dataset in 'R Markdown' format, and
renders it to 'markdown' for 'GitHub'. A codebook contains metadata and
documentation for a data file.
We urge users to customize the automatically generated 'R Markdown'
document and re-knit it, for example, to add a paragraph with details on
the data collection procedures. The variable descriptives are stored in
a .csv
file, which can be edited in 'R' or a spreadsheet program.
Columns can be appended, and we encourage users to complete at least the
following two columns in this file:
Describe the type of variable in this column. For example: "morality".
Provide a plain-text description of the variable. For example, the full text of a questionnaire item: "People should be willing to do anything to help a member of their family".
Re-knitting the 'R Markdown' file (using render
) will
transfer these changes to the 'markdown' file for 'GitHub'.
make_codebook( data, filename = "codebook.Rmd", render_file = TRUE, csv_file = gsub("rmd$", "csv", filename, ignore.case = TRUE), verbose = TRUE )
make_codebook( data, filename = "codebook.Rmd", render_file = TRUE, csv_file = gsub("rmd$", "csv", filename, ignore.case = TRUE), verbose = TRUE )
data |
A data.frame for which to create a codebook. |
filename |
Character. File name to write the codebook |
render_file |
Logical. Whether or not to render the document. |
csv_file |
Character. File name to write the codebook |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
Logical
, indicating whether or not the operation was
successful. This function is mostly called for its side effect of rendering
an 'R Markdown' codebook.
if(rmarkdown::pandoc_available("2.0")){ library(rmarkdown) library(knitr) filename <- tempfile("codebook", fileext = ".Rmd") make_codebook(iris, filename = filename, csv_file = NULL) unlink(c( ".worcs", filename, gsub("\\.Rmd", "\\.md", filename), gsub("\\.Rmd", "\\.html", filename), gsub("\\.Rmd", "_files", filename) ), recursive = TRUE) }
if(rmarkdown::pandoc_available("2.0")){ library(rmarkdown) library(knitr) filename <- tempfile("codebook", fileext = ".Rmd") make_codebook(iris, filename = filename, csv_file = NULL) unlink(c( ".worcs", filename, gsub("\\.Rmd", "\\.md", filename), gsub("\\.Rmd", "\\.html", filename), gsub("\\.Rmd", "_files", filename) ), recursive = TRUE) }
This function prints a notification message when some or all of
the data used in a project are synthetic (see closed_data
and
synthetic
). See details for important information.
notify_synthetic(..., msg = NULL)
notify_synthetic(..., msg = NULL)
... |
Objects of class |
msg |
Expression containing the message to print in case not all
|
The preferred way to use this function is to provide specific data
objects in the function call, using the ...
argument.
If no such objects are provided, notify_synthetic
will scan the
parent environment for objects of class worcs_data
.
This function is emphatically designed to be included in an 'R Markdown' file, to dynamically generate a notification message when a third party 'Knits' such a document without having access to all original data.
No return value. This function is called for its side effect of printing a notification message.
closed_data synthetic add_synthetic
df <- iris class(df) <- c("worcs_data", class(df)) attr(df, "type") <- "synthetic" notify_synthetic(df, msg = "synthetic")
df <- iris class(df) <- c("worcs_data", class(df)) attr(df, "type") <- "synthetic" notify_synthetic(df, msg = "synthetic")
This function saves a data.frame as a .csv
file (using
write.csv
), stores a checksum in '.worcs',
and amends the .gitignore
file to exclude filename
.
open_data( data, filename = paste0(deparse(substitute(data)), ".csv"), codebook = paste0("codebook_", deparse(substitute(data)), ".Rmd"), value_labels = paste0("value_labels_", deparse(substitute(data)), ".yml"), worcs_directory = ".", save_expression = write.csv(x = data, file = filename, row.names = FALSE), load_expression = read.csv(file = filename, stringsAsFactors = TRUE), ... )
open_data( data, filename = paste0(deparse(substitute(data)), ".csv"), codebook = paste0("codebook_", deparse(substitute(data)), ".Rmd"), value_labels = paste0("value_labels_", deparse(substitute(data)), ".yml"), worcs_directory = ".", save_expression = write.csv(x = data, file = filename, row.names = FALSE), load_expression = read.csv(file = filename, stringsAsFactors = TRUE), ... )
data |
A data.frame to save. |
filename |
Character, naming the file data should be written to. By
default, constructs a filename from the name of the object passed to
|
codebook |
Character, naming the file the codebook should be written to.
An 'R Markdown' codebook will be created and rendered to
|
value_labels |
Character, naming the file the value labels of factors
and ordinal variables should be written to.
By default, constructs a filename from the name of the object passed to
|
worcs_directory |
Character, indicating the WORCS project directory to
which to save data. The default value |
save_expression |
An R-expression used to save the |
load_expression |
An R-expression used to load the |
... |
Additional arguments passed to and from functions. |
Returns NULL
invisibly. This
function is called for its side effects.
open_data closed_data save_data
test_dir <- file.path(tempdir(), "data") old_wd <- getwd() dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:5, ] open_data(df, codebook = NULL) setwd(old_wd) unlink(test_dir, recursive = TRUE)
test_dir <- file.path(tempdir(), "data") old_wd <- getwd() dir.create(test_dir) setwd(test_dir) worcs:::write_worcsfile(".worcs") df <- iris[1:5, ] open_data(df, codebook = NULL) setwd(old_wd) unlink(test_dir, recursive = TRUE)
Evaluate the recipe contained in a WORCS project to derive its endpoints.
reproduce(worcs_directory = ".", verbose = TRUE, check_endpoints = TRUE, ...)
reproduce(worcs_directory = ".", verbose = TRUE, check_endpoints = TRUE, ...)
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
verbose |
Logical. Whether or not to print status messages to the
console. Default: |
check_endpoints |
Logical. Whether or not to call |
... |
Additional arguments. |
No return value. This function is called for its side effects.
add_endpoint
snapshot_endpoints
check_endpoints
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "reproduce") dir.create(test_dir) setwd(test_dir) file.create(".worcs") worcs:::add_recipe(recipe = 'writeLines("test", "test.txt")') # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "reproduce") dir.create(test_dir) setwd(test_dir) file.create(".worcs") worcs:::add_recipe(recipe = 'writeLines("test", "test.txt")') # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Calculate skew and kurtosis, standard errors for both, and the estimates divided by two times the standard error. If this latter quantity exceeds an absolute value of 1, the skew/kurtosis is significant. With very large sample sizes, significant skew/kurtosis is common.
skew_kurtosis(x, verbose = FALSE, se = FALSE, ...)
skew_kurtosis(x, verbose = FALSE, se = FALSE, ...)
x |
An object for which a method exists. |
verbose |
Logical. Whether or not to print messages to the console, Default: FALSE |
se |
Whether or not to return the standard errors, Default: FALSE |
... |
Additional arguments to pass to and from functions. |
A matrix
of skew and kurtosis statistics for x
.
skew_kurtosis(datasets::anscombe)
skew_kurtosis(datasets::anscombe)
Update the checksums of all endpoints in a WORCS project.
snapshot_endpoints(worcs_directory = ".", verbose = TRUE, ...)
snapshot_endpoints(worcs_directory = ".", verbose = TRUE, ...)
worcs_directory |
Character, indicating the WORCS project directory to which to save data. The default value "." points to the current directory. Default: '.' |
verbose |
Logical. Whether or not to print status messages to the console. Default: TRUE |
... |
Additional arguments. |
No return value. This function is called for its side effects.
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "update_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") writeLines("second test", "test.txt") snapshot_endpoints() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
# Create directory to run the example old_wd <- getwd() test_dir <- file.path(tempdir(), "update_endpoint") dir.create(test_dir) setwd(test_dir) file.create(".worcs") writeLines("test", "test.txt") add_endpoint("test.txt") writeLines("second test", "test.txt") snapshot_endpoints() # Cleaning example directory setwd(old_wd) unlink(test_dir, recursive = TRUE)
Generates a synthetic version of a data.frame
, with
similar characteristics to the original. See Details for the algorithm used.
synthetic( data, model_expression = ranger(x = x, y = y), predict_expression = predict(model, data = xsynth)$predictions, missingness_expression = NULL, verbose = TRUE )
synthetic( data, model_expression = ranger(x = x, y = y), predict_expression = predict(model, data = xsynth)$predictions, missingness_expression = NULL, verbose = TRUE )
data |
A data.frame of which to make a synthetic version. |
model_expression |
An R-expression to estimate a model. Defaults to
|
predict_expression |
An R-expression to generate predicted values based
on the model estimated by |
missingness_expression |
Optional. An R-expression to impute missing
values. Defaults to |
verbose |
Logical, Default: TRUE. Whether to show a progress bar while running the algorithm and provide informative messages. |
Based on the work by Nowok, Raab, and Dibben (2016), this function uses a simple algorithm to generate a synthetic dataset with similar characteristics to the original. The algorithm is as follows:
Let x be the original data.frame, with columns 1:j
Let xsynth be a synthetic data.frame, with columns 1:j
Column 1 of xsynth is a bootstrapped version of column 1 of x
Using model_expression
, a predictive model is built for column
c, for c along 2:j, with c predicted from columns 1:(c-1) of the original
data.
Using predict_expression
, columns 1:(c-1) of the synthetic data
are used to predict synthetic values for column c.
Variables are thus imputed in order of occurrence in the data.frame
.
To impute in a different order, reorder the data.
Note that, for data synthesis to work properly, it is essential that the
class
of variables is defined correctly. The default algorithm
ranger
supports numeric, integer, and factor types.
Other types of variables should be converted to one of these types, or users
can use a custom model_expression
and predict_expressio
when calling synthetic
.
Note that for data synthesis to work properly, it is essential that the
class
of variables is defined correctly. The default algorithm
ranger
supports numeric, integer, factor, and logical
data. Other types of variables should be converted to one of these types.
Users can provide use a custom model_expression
and
predict_expression
to use a different algorithm when calling
synthetic
.
As demonstrated in the example, users could call lm
as a
model_expression
to use
linear regression, which preserves linear marginal relationships but can give
rise to values out of range of the original data.
Or users could call sample
as a predict_expression
to bootstrap
each variable, a very quick solution that maintains univariate distributions
but loses all marginal relationships. These examples are not exhaustive, and
users can even create custom functions.
A data.frame
with synthetic data, based on data
.
Nowok, B., Raab, G.M and Dibben, C. (2016). synthpop: Bespoke creation of synthetic data in R. Journal of Statistical Software, 74(11), 1-26. doi:10.18637/jss.v074.i11.
## Not run: # Example using the iris dataset and default ranger algorithm iris_syn <- synthetic(iris) # Example using lm as prediction algorithm (only works for numeric variables) # note that, within the model_expression, a new data.frame is created because # lm() requires a separate data argument: dat <- iris[, 1:4] synthetic(dat, model_expression = lm(.outcome ~ ., data = data.frame(.outcome = y, xsynth)), predict_expression = predict(model, newdata = xsynth)) ## End(Not run) # Example using bootstrapping: synthetic(iris, model_expression = NULL, predict_expression = sample(y, size = length(y), replace = TRUE)) ## Not run: # Example with missing data, no imputation iris_missings <- iris for(i in 1:10){ iris_missings[sample.int(nrow(iris_missings), 1, replace = TRUE), sample.int(ncol(iris_missings), 1, replace = TRUE)] <- NA } iris_miss_syn <- synthetic(iris_missings) # Example with missing data, imputation by median/mode substitution # First, define a simple function for median/mode substitution: imp_fun <- function(x){ if(is.data.frame(x)){ return(data.frame(sapply(x, imp_fun))) } else { out <- x if(inherits(x, "numeric")){ out[is.na(out)] <- median(x[!is.na(out)]) } else { out[is.na(out)] <- names(sort(table(out), decreasing = TRUE))[1] } out } } # Then, call synthetic() with this function as missingness_expression: iris_miss_syn <- synthetic(iris_missings, missingness_expression = imp_fun(data)) ## End(Not run)
## Not run: # Example using the iris dataset and default ranger algorithm iris_syn <- synthetic(iris) # Example using lm as prediction algorithm (only works for numeric variables) # note that, within the model_expression, a new data.frame is created because # lm() requires a separate data argument: dat <- iris[, 1:4] synthetic(dat, model_expression = lm(.outcome ~ ., data = data.frame(.outcome = y, xsynth)), predict_expression = predict(model, newdata = xsynth)) ## End(Not run) # Example using bootstrapping: synthetic(iris, model_expression = NULL, predict_expression = sample(y, size = length(y), replace = TRUE)) ## Not run: # Example with missing data, no imputation iris_missings <- iris for(i in 1:10){ iris_missings[sample.int(nrow(iris_missings), 1, replace = TRUE), sample.int(ncol(iris_missings), 1, replace = TRUE)] <- NA } iris_miss_syn <- synthetic(iris_missings) # Example with missing data, imputation by median/mode substitution # First, define a simple function for median/mode substitution: imp_fun <- function(x){ if(is.data.frame(x)){ return(data.frame(sapply(x, imp_fun))) } else { out <- x if(inherits(x, "numeric")){ out[is.na(out)] <- median(x[!is.na(out)]) } else { out[is.na(out)] <- names(sort(table(out), decreasing = TRUE))[1] } out } } # Then, call synthetic() with this function as missingness_expression: iris_miss_syn <- synthetic(iris_missings, missingness_expression = imp_fun(data)) ## End(Not run)
Evaluates whether a project meets the criteria of the WORCS
checklist (see worcs_checklist
), and adds a badge to the
project's README.md
.
worcs_badge( path = ".", update_readme = "README.md", update_csv = "checklist.csv" )
worcs_badge( path = ".", update_readme = "README.md", update_csv = "checklist.csv" )
path |
Character. This can either be the path to a WORCS project folder
(a project with a |
update_readme |
Character. Path to the |
update_csv |
Character. Path to the |
No return value. This function is called for its side effects.
example_dir <- file.path(tempdir(), "badge") dir.create(example_dir) write("a", file.path(example_dir, ".worcs")) worcs_badge(path = example_dir, update_readme = NULL)
example_dir <- file.path(tempdir(), "badge") dir.create(example_dir) write("a", file.path(example_dir, ".worcs")) worcs_badge(path = example_dir, update_readme = NULL)
This checklist can be used to see whether a project adheres to the principles of open reproducible code in science, as set out in the WORCS paper.
data(worcs_checklist)
data(worcs_checklist)
A data frame with 15 rows and 5 variables.
category | factor |
Category of the checklist element. |
name | factor |
Name of the checklist element. |
description | factor |
What are the requirements to claim that this checklist element is met? |
importance | factor |
Whether the checklist element is essential to obtain a green 'open science' badge, or optional. |
check | logical |
Whether the criterion is checked
automatically by worcs_badge .
|
Van Lissa, C. J., Brandmaier, A. M., Brinkman, L., Lamprecht, A., Peikert, A., , Struiksma, M. E., & Vreede, B. (2021) doi:10.3233/DS-210031.
Creates a new 'worcs' project. This function is invoked by the 'RStudio' project template manager, but can also be called directly to create a WORCS project through syntax or the console.
worcs_project( path = "worcs_project", manuscript = "APA6", preregistration = "cos_prereg", add_license = "CC_BY_4.0", use_renv = TRUE, use_targets = FALSE, remote_repo = "https", verbose = TRUE, ... )
worcs_project( path = "worcs_project", manuscript = "APA6", preregistration = "cos_prereg", add_license = "CC_BY_4.0", use_renv = TRUE, use_targets = FALSE, remote_repo = "https", verbose = TRUE, ... )
path |
Character, indicating the directory in which to create the 'worcs' project. Default: 'worcs_project'. |
manuscript |
Character, indicating what template to use for the
'R Markdown' manuscript. Default: 'APA6'. Available choices include
|
preregistration |
Character, indicating what template to use for the
preregistration. Default: 'cos_prereg'. Available choices include:
|
add_license |
Character, indicating what license to include.
Default: 'CC_BY_4.0'. Available options include:
|
use_renv |
Logical, indicating whether or not to use 'renv' to make the
project reproducible. Default: TRUE. See |
use_targets |
Logical, indicating whether or not to use 'targets' to
create a Make-like pipeline. Default: FALSE See |
remote_repo |
Character, address of the remote repository for
this project. This link should have the form
|
verbose |
Logical. Whether or not to print messages to the console during project creation. Default: TRUE |
... |
Additional arguments passed to and from functions. |
No return value. This function is called for its side effects.
the_test <- "worcs_template" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) do.call(git_user, worcs:::get_user()) worcs_project(file.path(tempdir(), the_test, "worcs_project"), manuscript = "github_document", preregistration = "None", add_license = "None", use_renv = FALSE, remote_repo = "https") setwd(old_wd) unlink(file.path(tempdir(), the_test))
the_test <- "worcs_template" old_wd <- getwd() dir.create(file.path(tempdir(), the_test)) do.call(git_user, worcs:::get_user()) worcs_project(file.path(tempdir(), the_test, "worcs_project"), manuscript = "github_document", preregistration = "None", add_license = "None", use_renv = FALSE, remote_repo = "https") setwd(old_wd) unlink(file.path(tempdir(), the_test))