02 - initialize your project

Let’s create a new R script for this project. In RStudio, select File > New File > R Script and save it to your new project directory.

First we need to load the MitoPilot package.

# ONLY NEEDED FOR HYDRA CLUSTER
# modify PATH to include:
#   ~/bin (contains nextflow exe)
#   java 21.0.2 (required for nextflow)
default_path <- "/cm/shared/apps/uge/8.8.1/bin/lx-amd64:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/lib/rstudio-server/bin/quarto/bin:/usr/lib/rstudio-server/bin/postback"
new_path <- paste0(Sys.getenv("HOME"), "/bin:/share/apps/tools/java/21.0.2/bin")
Sys.setenv(PATH = paste(new_path, default_path, sep = ":"))

# load MitoPilot package
library(MitoPilot)

Next we need to specify a number of parameters for the project.

# directory where your test project will be created
# if the directory does not exist, MitoPilot will create it
wd <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/run_01"

# full path to your sample mapping CSV file
# swap <<USER>> for your user ID
map_file <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/map_file.csv"

# full path to your sequence data directory
# swap <<USER>> for your user ID
seq_data <- "/pool/public/genomics/<<USER>>/MitoPilot_workshop/my_project/raw_data"

# genetic code for your samples
# see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
# for corals, the appropriate translation table is #4
genetic_code <- 4

# minimum number of reads required to process a sample
# default is 2,000,000
# here we are setting a very low threshold because the example reads have already been filtered extensively
min_reads <- 10000

# since this is a coral dataset, we need to use custom GetOrganelle reference databases
# for more on custom assembly databases, see:
# https://smithsonianworkshops.github.io/MitoPilot_workshop_2025/qmd/advanced/02_customAsmbDB.html
label_db <- "/scratch/nmnh_ocean_dna/GetOrganelle_refDBs/Octocorallia/2025_07_03/refs_final/labelDB.fasta"
seed_db <- "/scratch/nmnh_ocean_dna/GetOrganelle_refDBs/Octocorallia/2025_07_03/refs_final/seedDB.fasta"

# specify an execution environment, "local", "NMNH_Hydra", or "NOAA_SEDNA"
ex <- "NMNH_Hydra"

Now we can initialize the project using those parameters.

# initialize the test project
MitoPilot::new_project(
    path = wd,
    executor = ex,
    mapping_fn = map_file,
    genetic_code = genetic_code,
    min_depth = min_reads,
    data_path = seq_data,
    custom_seeds_db = seed_db,
    custom_labels_db = label_db,
    Rproj = FALSE
)

Now we’re ready to launch the GUI and start processing our samples!

# the function to launch the GUI 
# must be called from within your project directory
setwd(wd)
MitoPilot::MitoPilot()