The first function process_ddPCR_data to read your
amplitude (.csv) file exported from QX Manager
# Load dataset for each file
process_ddPCR_data <- function(file_path, channel_num1 = 1, channel_num2 = 2) {
library(dplyr)
# Read the dataset
examplesheet <- read.table(sep=",", file_path, skip = 3, header = TRUE)
# Construct column names based on input numbers
ch1 <- paste0("Ch", channel_num1, "Amplitude")
ch2 <- paste0("Ch", channel_num2, "Amplitude")
x1 <- paste0("X", channel_num1)
x2 <- paste0("X", channel_num2)
# Select only the specified amplitude channels and their corresponding classification columns
processed_data <- examplesheet %>%
select(all_of(c(ch1, ch2, x1, x2))) %>%
mutate(Population = case_when(
X1 == 0 & X2 == 0 ~ "Double neg",
X1 == 1 & X2 == 1 ~ "Double pos",
X1 == 1 ~ "Ch1 pos",
X2 == 1 ~ "Ch2 pos"))
return(processed_data)
}
# Example usage:
# processed_data <- process_ddPCR_data("6-Color_SingleTargetPerChannel_Example_D01_Amplitude.csv", 1, 2)
The second function apply_thresholds to modify the default
(at the time of export) thresholds and plot the different droplet
clusters
apply_thresholds <- function(processed_data, ch1_threshold = NULL, ch2_threshold = NULL,
ch1_col = "Ch1Amplitude", ch2_col = "Ch2Amplitude") {
library(ggplot2)
# Copy original Population to CustomPopulation
processed_data <- processed_data %>%
mutate(CustomPopulation = Population)
# If no manual threshold given, calculate defaults
if (is.null(ch1_threshold)) {
ch1_threshold <- processed_data %>%
filter(Population == "Ch1 pos" | Population == "Double pos") %>%
summarise(min_amp = min(.data[[ch1_col]], na.rm = TRUE)) %>%
pull(min_amp) - 1
}
if (is.null(ch2_threshold)) {
ch2_threshold <- processed_data %>%
filter(Population == "Ch2 pos" | Population == "Double pos") %>%
summarise(min_amp = min(.data[[ch2_col]], na.rm = TRUE)) %>%
pull(min_amp) - 1
}
# Improved thresholding (partition assignment) logic
processed_data <- processed_data %>%
mutate(CustomPopulation = case_when(
.data[[ch1_col]] >= ch1_threshold & .data[[ch2_col]] >= ch2_threshold ~ "Double pos",
.data[[ch1_col]] >= ch1_threshold & .data[[ch2_col]] < ch2_threshold ~ "Ch1 pos",
.data[[ch1_col]] < ch1_threshold & .data[[ch2_col]] >= ch2_threshold ~ "Ch2 pos",
.data[[ch1_col]] < ch1_threshold & .data[[ch2_col]] < ch2_threshold ~ "Double neg",
TRUE ~ "Undefined"
))
# Simple density scatterplot
p<-ggplot(processed_data, aes(x = Ch2Amplitude, y = Ch1Amplitude, colour = CustomPopulation))+
geom_point()+
scale_colour_manual(values = c("Ch1 pos" = "#000666", "Ch2 pos" = "#006600",
"Double pos"="#FF6600", "Double neg" ="#333333"))+
geom_hex(alpha=0.3)+
scale_fill_gradient(low="white", high="red")+
geom_hline(yintercept = ch1_threshold, linetype = "dashed", color = "black") + # Horizontal threshold
geom_vline(xintercept = ch2_threshold, linetype = "dashed", color = "black") + # Vertical threshold
labs(colour = "Droplet population") +
theme_minimal()
# Return both processed data and threshold values
return(list(
processed_data_thr = processed_data,
ch1_threshold = ch1_threshold,
ch2_threshold = ch2_threshold,
p = p
))
}
# Example usage:
## processed_data <- process_ddPCR_data("yourfile.csv", 1, 2)
# With default thresholds:
## data_with_default <- apply_thresholds(processed_data)
# OR with manual thresholds:
## data_with_custom <- apply_thresholds(processed_data, ch1_threshold = 3000, ch2_threshold = 4000)
# Extract data from the returned list
## data_table <- data_with_custom$processed_data_thr
## ch1_line <- data_with_custom$ch1_threshold
## ch2_line <- data_with_custom$ch2_threshold
## Plot
## Plot <- data_with_default$p
## p
The output of the second function could look something like
this:
result <- apply_thresholds(processed_data, ch1_threshold = 2000, ch2_threshold = 2500)
print(result[[4]])

The third function simulate_droplet_clusters to compare
your experimental clusters with simulated (perfectly distributed)
clusters
simulate_droplet_clusters <- function(processed_data, population_col = "Population") {
library(dplyr)
library(ggplot2)
library(patchwork)
# Helper to pull population (either Population or CustomPopulation - you choose) data
pop_data <- processed_data %>% rename(PopulationUse = !!population_col)
### --- Ch1 pos cluster (e.g., FAM) ---
ch1_cluster <- pop_data %>% filter(PopulationUse == "Ch1 pos")
mean_ch1 <- mean(ch1_cluster$Ch1Amplitude, na.rm = TRUE)
sd_ch1 <- sd(ch1_cluster$Ch1Amplitude, na.rm = TRUE)
mean_ch2_in_ch1 <- mean(ch1_cluster$Ch2Amplitude, na.rm = TRUE)
sd_ch2_in_ch1 <- sd(ch1_cluster$Ch2Amplitude, na.rm = TRUE)
n_ch1 <- nrow(ch1_cluster)
# Simulated Ch1 pos data
sim_ch1 <- data.frame(
Ch1 = rnorm(n_ch1, mean = mean_ch1, sd = sd_ch1),
Ch2 = rnorm(n_ch1, mean = mean_ch2_in_ch1, sd = sd_ch2_in_ch1),
Type = "simulated"
)
# Experimental Ch1 pos data
exp_ch1 <- ch1_cluster %>%
select(Ch1 = Ch1Amplitude, Ch2 = Ch2Amplitude) %>%
mutate(Type = "experimental")
data_Ch1_pos_both <- bind_rows(sim_ch1, exp_ch1)
### Ch2 pos cluster (e.g., HEX)
ch2_cluster <- pop_data %>% filter(PopulationUse == "Ch2 pos")
mean_ch2 <- mean(ch2_cluster$Ch2Amplitude, na.rm = TRUE)
sd_ch2 <- sd(ch2_cluster$Ch2Amplitude, na.rm = TRUE)
mean_ch1_in_ch2 <- mean(ch2_cluster$Ch1Amplitude, na.rm = TRUE)
sd_ch1_in_ch2 <- sd(ch2_cluster$Ch1Amplitude, na.rm = TRUE)
n_ch2 <- nrow(ch2_cluster)
# Simulated Ch2 pos data
sim_ch2 <- data.frame(
Ch1 = rnorm(n_ch2, mean = mean_ch1_in_ch2, sd = sd_ch1_in_ch2),
Ch2 = rnorm(n_ch2, mean = mean_ch2, sd = sd_ch2),
Type = "simulated"
)
# Experimental Ch2 pos data
exp_ch2 <- ch2_cluster %>%
select(Ch1 = Ch1Amplitude, Ch2 = Ch2Amplitude) %>%
mutate(Type = "experimental")
data_Ch2_pos_both <- bind_rows(sim_ch2, exp_ch2)
# Plotting direct comparisons
# Extract the combined experimental + simulated clusters
data_FAM_pos_both <- data_Ch1_pos_both
data_HEX_pos_both <- data_Ch2_pos_both
# FAM (Ch1 pos) plot
fam_sim_exp <- ggplot(data_Ch1_pos_both, aes(x = Ch2, y = Ch1)) +
geom_point(color = "#000666", alpha = 0.6,inherit.aes = TRUE) +
facet_grid(~Type) +
theme_minimal() +
labs(
title = "Ch1-positive Droplets (FAM)",
x = "Ch2 Amplitude",
y = "Ch1 Amplitude"
)
# HEX (Ch2 pos) plot
hex_sim_exp <- ggplot(data_Ch2_pos_both, aes(x = Ch2, y = Ch1)) +
geom_point(color = "#006600", alpha = 0.6,inherit.aes = TRUE) +
facet_grid(~Type) +
theme_minimal() +
labs(
title = "Ch2-positive Droplets (HEX)",
x = "Ch2 Amplitude",
y = "Ch1 Amplitude"
)
# Combine using patchwork
combined_plot <- fam_sim_exp / hex_sim_exp +
plot_annotation(title = "Simulated vs Experimental Droplet Clusters")
return(list(
Ch1_cluster = data_Ch1_pos_both,
Ch2_cluster = data_Ch2_pos_both,
combined_plot = combined_plot
))
}
# Example usage
## clusters <- simulate_droplet_clusters(data_with_custom, population_col = "CustomPopulation")
## OR
## clusters <- simulate_droplet_clusters(processed_data)
## ch1_cluster_data <- clusters$Ch1_cluster
## ch2_cluster_data <- clusters$Ch2_cluster
## combined_plot <- clusters$combined_plot
## combined_plot
An example of cluster comparison (experimental vs simulated) would
look something like this:
simulated_clusters<-simulate_droplet_clusters(processed_data)
print(simulated_clusters[[3]])

The fourth and final function simulate_ddPCR_counts_plot
plots the theoretical copies per each droplet (0, 1, >1…) together
with 95% conf. intervals using experimentally derived CPD
simulate_ddPCR_counts_plot <- function(processed_data, population_col = "Population", num_simulations = 100) {
library(dplyr)
library(ggplot2)
library(tidyr)
# Dynamically extract correct population column
pop_data <- processed_data %>% rename(PopulationUse = !!population_col)
# Total droplets
total_droplets <- nrow(pop_data)
# Positive droplets for each channel
positive_droplets_Ch1 <- nrow(filter(pop_data, PopulationUse %in% c("Ch1 pos", "Double pos")))
positive_droplets_Ch2 <- nrow(filter(pop_data, PopulationUse %in% c("Ch2 pos", "Double pos")))
# Proportions and Poisson lambda estimates
lambda_estimated_Ch1 <- -log(1 - positive_droplets_Ch1 / total_droplets)
lambda_estimated_Ch2 <- -log(1 - positive_droplets_Ch2 / total_droplets)
# Run simulations
simulated_data <- lapply(1:num_simulations, function(i) {
x_vals <- rpois(total_droplets, lambda = lambda_estimated_Ch1)
y_vals <- rpois(total_droplets, lambda = lambda_estimated_Ch2)
data.frame(x = x_vals, y = "Ch1", sim = i) %>%
bind_rows(data.frame(x = y_vals, y = "Ch2", sim = i))
}) %>% bind_rows()
# Count occurrences
simulated_counts <- simulated_data %>%
group_by(y, sim, x) %>%
summarise(count = n(), .groups = "drop")
# Compute means and 95% CI
summary_data <- simulated_counts %>%
group_by(y, x) %>%
summarise(
mean_count = mean(count),
sd_count = ifelse(n() > 1, sd(count), 0),
.groups = "drop"
) %>%
mutate(
ci_lower = mean_count - 1.96 * (sd_count / sqrt(num_simulations)),
ci_upper = mean_count + 1.96 * (sd_count / sqrt(num_simulations))
) %>%
replace_na(list(ci_lower = 0, ci_upper = 0))
# Plot
plot <- ggplot(summary_data, aes(x = x, y = mean_count, fill = y)) +
geom_col(alpha = 0.6, position = "dodge") +
geom_errorbar(aes(ymin = pmax(ci_lower, 0), ymax = ci_upper),
width = 0.2, position = position_dodge(0.9)) +
geom_text(aes(label = round(mean_count, 1)),
vjust = -0.5, position = position_dodge(0.9)) +
labs(title = paste0("Simulated Frequencies of Droplets with 0, 1 and >1 target copies (n = ", num_simulations, ")"),
x = "Number of DNA molecules per droplet",
y = "Mean Frequency (± 95% CI)") +
scale_fill_manual(values = c("Ch1" = "#000666", "Ch2" = "#006600")) +
facet_wrap(~y) +
theme_minimal() +
theme(legend.position = "none")
return(plot)
}
# Example usage 1
## plot <- simulate_ddPCR_counts_plot(processed_data = processed_data, population_col = "Population")
# plot
# Example usage 2
## plot_custom <- simulate_ddPCR_counts_plot(processed_data = data_with_custom$processed_data_thr, population_col = "CustomPopulation")
## plot_custom
The output of the fourth function would be a Poisson distribution of
the # of target copies (0, 1, 2…) plotted against their frequency
(i.e. number of droplets). By default the number of simulations is 100
and the means and confidence intervals are plotted, for both
channels.
simulate_ddPCR_counts_plot(processed_data)
