library(openxlsx)
library(dplyr)
# User inputs
# -------------
= # folder with files
dirinput = # output folder to save the files
diroutput
# Files
= # insert filename of table with subject info
file_subjects = # insert name of sheet in table
sheet_subjects
= # filename of table with scanlist
file_scanlist = # insert name of sheet in table
sheet_scanlist
# missing values definition
=c("","N/A") # labels used to specify missing value, first value is always: ""
labels_missing_subjects =c("","N/A")
labels_missing_scanlist
# Define columns of interest (key variables: column cell values should be identical across tables)
= # name of column identifying subject in subject file
sID_subjects = # name of column identifying subject in scan list file
sID_scans
# Read tables
# ---------------
<- openxlsx::read.xlsx(xlsxFile = file.path(dirinput,file_subjects),
tbl_subj sheet= sheet_subjects,
na.strings =labels_missing_subj)
print(paste0('The table: has ' , nrow(tbl_subj), ' rows and ',ncol(tbl_subj), ' columns'))
#
<- openxlsx::read.xlsx(xlsxFile = file.path(dirinput,file_scanlist),
tbl_scans sheet= sheet_scanlist,
na.strings=labels_missing_scanlist)
print(paste0('The table : has ' , nrow(tbl_scans), ' rows and ',ncol(tbl_scans), ' columns'))
# Join tables
# ---------------
<- full_join(x=tbl,
tbl_joined y = tbl_files,
by=join_by(sID_scans==sID_subjects),
keep=FALSE)
print(paste0('The new table has ' , nrow(tbl_joined), ' rows and ',ncol(tbl_joined), ' columns'))
# Save (specify your own output filename)
#-----
::write.xlsx(tbl_joined, file = file.path(diroutput,"table_merged.xlsx") openxlsx
Table Operations
R templates to do frequent table operations
December 16, 2024
Template 1 - Combine two Excels by a shared key variable
This template is based on the following use case. We have one table with subject information with one row per subject and a table with scan lists which has many more rows as subjects have multiple scans. Both tables have a variable identifying the subjects. The subject IDs are common to both tables. We want to combine them and have the list of scans with the subject information in the same table, this means the rows with the subject information will appear multiple times, one for each scan of a given subject. For this we use the function dplyr::full_join()
(Note: the libraries loaded with library()
are additional R packages that need to be installed with install.packages()
before first use)
Note. The code in this template is deliberately explicit for the sake of clarity, but it could be made more compact.