02 - initialize your project
Let’s create a new R script for this project. In RStudio, select File > New File > R Script and save it to your new project directory.
First we need to load the MitoPilot package.
# ONLY NEEDED FOR HYDRA CLUSTER
# modify PATH to include:
# ~/bin (contains nextflow exe)
# java 21.0.2 (required for nextflow)
default_path <- "/cm/shared/apps/uge/8.8.1/bin/lx-amd64:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/lib/rstudio-server/bin/quarto/bin:/usr/lib/rstudio-server/bin/postback"
new_path <- paste0(Sys.getenv("HOME"), "/bin:/share/apps/tools/java/21.0.2/bin")
Sys.setenv(PATH = paste(new_path, default_path, sep = ":"))
# load MitoPilot package
library(MitoPilot)Next we need to specify a number of parameters for the project.
# directory where your test project will be created
# if the directory does not exist, MitoPilot will create it
wd <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/run_01"
# full path to your sample mapping CSV file
# swap <<USER>> for your user ID
map_file <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/map_file.csv"
# full path to your sequence data directory
# swap <<USER>> for your user ID
seq_data <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/raw_data"
# genetic code for your samples
# see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
# for corals, the appropriate translation table is #4
genetic_code <- 4
# minimum number of reads required to process a sample
# default is 2,000,000
# here we are setting a very low threshold because the example reads have already been filtered extensively
min_reads <- 10000
# since this is a coral dataset, we need to use custom GetOrganelle reference databases
# for more on custom assembly databases, see:
# https://smithsonianworkshops.github.io/MitoPilot_workshop_2025/qmd/advanced/02_customAsmbDB.html
label_db <- "/scratch/nmnh_ocean_dna/GetOrganelle_refDBs/Octocorallia/2025_07_03/refs_final/labelDB.fasta"
seed_db <- "/scratch/nmnh_ocean_dna/GetOrganelle_refDBs/Octocorallia/2025_07_03/refs_final/seedDB.fasta"
# specify an execution environment, "local", "NMNH_Hydra", or "NOAA_SEDNA"
ex <- "NMNH_Hydra"Now we can initialize the project using those parameters.
# initialize the test project
MitoPilot::new_project(
path = wd,
executor = ex,
mapping_fn = map_file,
genetic_code = genetic_code,
min_depth = min_reads,
data_path = seq_data,
custom_seeds_db = seed_db,
custom_labels_db = label_db,
Rproj = FALSE
)Now we’re ready to launch the GUI and start processing our samples!
# the function to launch the GUI
# must be called from within your project directory
setwd(wd)
MitoPilot::MitoPilot()