Skip to contents
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(RPamisc)
library(ggblanket)
library(glue)
library(gt)
library(purrr)
library(ggplot2)
library(reactable)
library(patchwork)
library(palmerpenguins)

set_blanket()

demo_data_raw <- 
    penguins |>
    filter(!is.na(flipper_length_mm)) |>
    mutate(
        short_bill_flag = if_else(bill_length_mm < 45, 1, 0),
        long_bill_flag = if_else(bill_length_mm >= 45, 1, 0),
        short_flipper_flag = if_else(flipper_length_mm <= 209, 1, 0),
        long_flipper_flag = if_else(flipper_length_mm > 209, 1, 0)
        )

demo_data_summary <- 
    demo_data_raw |>
    summarise(
        #.by = c(species),
        n = n(), 
        short_bill = sum(short_bill_flag, na.rm = TRUE),
        long_bill = sum(long_bill_flag, na.rm = TRUE),
        short_flipper = sum(short_flipper_flag, na.rm = TRUE),
        long_flipper = sum(long_flipper_flag, na.rm = TRUE)
    ) |>
    mutate(
        bill_rate = round(short_bill/n, 2),
        flipper_rate = round(short_flipper/n, 2)
        #rate = round(long_flipper/n, 2)
    ) 
     
demo_aggregate <- function(.data) {
    
results <- 
    .data |>
    summarise(
        #.by = c(species),
        n = n(), 
        short_bill = sum(short_bill_flag, na.rm = TRUE),
        long_bill = sum(long_bill_flag, na.rm = TRUE),
        short_flipper = sum(short_flipper_flag, na.rm = TRUE),
        long_flipper = sum(long_flipper_flag, na.rm = TRUE)
    ) %>%
    mutate(
        bill_rate = round(short_bill/n, 2),
        flipper_rate = round(short_flipper/n, 2)
        #rate = round(long_flipper/n, 2)
    ) %>%
    arrange(desc(bill_rate), n)

results

}

methods_table <- tribble(
    ~method_name, ~sample_size, ~population,  ~data_source,  
    "Method 1", 15, "All encounters", "OSP PBI",
    "Method 2", 30, "Primary Care", "VA REDCap (manual chart reviews)",
    "Method 3", 50, "All encounters", "OSP PBI",
    "Method 4", 300, "Primary Care", "OSP PBI",
)

knitr::kable(methods_table)
method_name sample_size population data_source
Method 1 15 All encounters OSP PBI
Method 2 30 Primary Care VA REDCap (manual chart reviews)
Method 3 50 All encounters OSP PBI
Method 4 300 Primary Care OSP PBI
demo_data_raw |>  
    gg_histogram(
        x = flipper_length_mm,
        #x = bill_length_mm,
        col = species,
        #facet2 = island, 
        facet = species
        )
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

set.seed(911)

month <- seq(as.Date("2024-01-01"), as.Date("2024-07-01"), "month")

demo_sample <- function(.data, month, size = 15) {

#    https://stackoverflow.com/questions/50977337/calling-prop-test-function-in-r-with-dplyr    
    .data |>
        slice_sample(n = size) |>
        demo_aggregate() |>
        mutate(month = month, .before = 1) |>
        mutate(rate = map2(short_flipper, n, ~ prop.test(.x, .y, conf.level=0.95) %>%
                                     broom::tidy())) %>%
        tidyr::unnest(rate) %>% 
        select(
            month, 
            n, 
            short_flipper, 
            everything()
        )
    
}
# method 1
# sample of 15/month
method_1_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 15)) |>
    bind_rows()

set_blanket()

trend_plot <- function(.data, subtitle = "Method 1: sample 15") {
    
    .data |> 
       gg_pointrange(
            #subtitle = subtitle,
            x = month,  
            y = estimate,
            ymin = conf.low,
            ymax = conf.high,
            x_label = "",
            y_label = "",
            #y_label = "% Short Flipper", 
            y_labels = scales::percent,
            x_breaks = c(as.Date("2024-01-01"), as.Date("2024-07-01")),
            y_breaks = c(.2, .4, .6, .8, 1),
            y_expand_limits = c(.40, 1),
            #linewidth = 1, 
            ) +
        geom_line() +
        geom_hline(yintercept = .9, linetype = 'dashed') 
    
}


p1 <- 
    method_1_data |>
    trend_plot()
    
p1


# method 2
# sample of 30/month
method_2_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 30)) |>
    bind_rows()

p2 <- 
    method_2_data |>
    trend_plot(subtitle = "Method 2: sample 30")

#p2 

# method 3
# sample of 0/month
method_3_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 50)) |>
    bind_rows()

p3 <- 
    method_3_data |>
    trend_plot(subtitle = "Method 3: sample 50")

#p3
# method 4
# sample of 300/month
method_4_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 300)) |>
    bind_rows()

p4 <- 
    method_4_data |>
    trend_plot(subtitle = "Method 4: sample 300")

#p3

p1 + p2 + p3 + p4 + 
     plot_layout(
         2, 2, 
         axes = "collect", axis_titles = "collect") + 
    plot_annotation(
        title = "Comparison of sampling methods on monthly trend lines",
        subtitle = glue("Underlying population rate: {scales::percent(demo_data_summary$flipper_rate)}"),
        tag_levels = "1", 
        tag_prefix = "Method ")

results_reactable <- function(.data) {
    
    .data |>
    select(-c(contains("bill"), long_flipper, estimate:parameter, method, alternative)) |> 
    reactable(
        columns = list(
            conf.low = colDef(format = colFormat(digits = 3)),
            conf.high = colDef(format = colFormat(digits = 3))
        )
    )
}
  
results_table <- function(
        .data, 
        #title = "Method 1", 
        #description = "this is the description", 
        #data_source = "OSP PBI") {
        method_table) { 
    .data |>
    select(-c(contains("bill"), long_flipper, estimate:parameter, method, alternative)) |> 
    gt() |> 
    tab_header(
        title = method_table$method_name, 
        subtitle = glue("Monthly sample of {method_table$sample_size} cases from {method_table$population}")
    ) |>
    tab_footnote(footnote = md(glue("Data source: {method_table$data_source}"))) |>
    fmt_percent(columns = c("flipper_rate", "conf.low", "conf.high"), decimals = 0) |>
    fmt_date(columns = c("month"), date_style = "yMMM") |>
    cols_merge_n_pct("short_flipper", "flipper_rate") |>
    cols_merge(columns = c("conf.low", "conf.high"), pattern = "[{1}, {2}]") |>
    cols_label(
        conf.low = "95% CI", 
        short_flipper = "Number Met (%)"
    ) |>
    cols_align(
        align = "center",
        columns = c("short_flipper", "conf.low")
    )
} 
    

method_1_data |> 
   results_table(methods_table[1, ])
Method 1
Monthly sample of 15 cases from All encounters
month n Number Met (%) 95% CI
Jan 2024 15 10 (67%) [39%, 87%]
Feb 2024 15 10 (67%) [39%, 87%]
Mar 2024 15 10 (67%) [39%, 87%]
Apr 2024 15 12 (80%) [51%, 95%]
May 2024 15 9 (60%) [33%, 83%]
Jun 2024 15 10 (67%) [39%, 87%]
Jul 2024 15 10 (67%) [39%, 87%]
Data source: OSP PBI

method_2_data |> 
    results_table(methods_table[2, ])
Method 2
Monthly sample of 30 cases from Primary Care
month n Number Met (%) 95% CI
Jan 2024 30 22 (73%) [54%, 87%]
Feb 2024 30 20 (67%) [47%, 82%]
Mar 2024 30 17 (57%) [38%, 74%]
Apr 2024 30 21 (70%) [50%, 85%]
May 2024 30 19 (63%) [44%, 79%]
Jun 2024 30 19 (63%) [44%, 79%]
Jul 2024 30 18 (60%) [41%, 77%]
Data source: VA REDCap (manual chart reviews)

method_3_data |> 
    results_table(methods_table[3, ])
Method 3
Monthly sample of 50 cases from All encounters
month n Number Met (%) 95% CI
Jan 2024 50 33 (66%) [51%, 78%]
Feb 2024 50 41 (82%) [68%, 91%]
Mar 2024 50 29 (58%) [43%, 72%]
Apr 2024 50 36 (72%) [57%, 83%]
May 2024 50 31 (62%) [47%, 75%]
Jun 2024 50 30 (60%) [45%, 73%]
Jul 2024 50 30 (60%) [45%, 73%]
Data source: OSP PBI

method_4_data |> 
    results_table(methods_table[4, ])
Method 4
Monthly sample of 300 cases from Primary Care
month n Number Met (%) 95% CI
Jan 2024 300 200 (67%) [61%, 72%]
Feb 2024 300 201 (67%) [61%, 72%]
Mar 2024 300 199 (66%) [61%, 72%]
Apr 2024 300 198 (66%) [60%, 71%]
May 2024 300 199 (66%) [61%, 72%]
Jun 2024 300 199 (66%) [61%, 72%]
Jul 2024 300 197 (66%) [60%, 71%]
Data source: OSP PBI