sandbox

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(RPamisc)
library(ggblanket)
library(glue)
library(gt)
library(purrr)
library(ggplot2)
library(reactable)
library(patchwork)
library(palmerpenguins)

set_blanket()

demo_data_raw <- 
    penguins |>
    filter(!is.na(flipper_length_mm)) |>
    mutate(
        short_bill_flag = if_else(bill_length_mm < 45, 1, 0),
        long_bill_flag = if_else(bill_length_mm >= 45, 1, 0),
        short_flipper_flag = if_else(flipper_length_mm <= 209, 1, 0),
        long_flipper_flag = if_else(flipper_length_mm > 209, 1, 0)
        )

demo_data_summary <- 
    demo_data_raw |>
    summarise(
        #.by = c(species),
        n = n(), 
        short_bill = sum(short_bill_flag, na.rm = TRUE),
        long_bill = sum(long_bill_flag, na.rm = TRUE),
        short_flipper = sum(short_flipper_flag, na.rm = TRUE),
        long_flipper = sum(long_flipper_flag, na.rm = TRUE)
    ) |>
    mutate(
        bill_rate = round(short_bill/n, 2),
        flipper_rate = round(short_flipper/n, 2)
        #rate = round(long_flipper/n, 2)
    ) 
     
demo_aggregate <- function(.data) {
    
results <- 
    .data |>
    summarise(
        #.by = c(species),
        n = n(), 
        short_bill = sum(short_bill_flag, na.rm = TRUE),
        long_bill = sum(long_bill_flag, na.rm = TRUE),
        short_flipper = sum(short_flipper_flag, na.rm = TRUE),
        long_flipper = sum(long_flipper_flag, na.rm = TRUE)
    ) %>%
    mutate(
        bill_rate = round(short_bill/n, 2),
        flipper_rate = round(short_flipper/n, 2)
        #rate = round(long_flipper/n, 2)
    ) %>%
    arrange(desc(bill_rate), n)

results

}

methods_table <- tribble(
    ~method_name, ~sample_size, ~population,  ~data_source,  
    "Method 1", 15, "All encounters", "OSP PBI",
    "Method 2", 30, "Primary Care", "VA REDCap (manual chart reviews)",
    "Method 3", 50, "All encounters", "OSP PBI",
    "Method 4", 300, "Primary Care", "OSP PBI",
)

knitr::kable(methods_table)

method_name	sample_size	population	data_source
Method 1	15	All encounters	OSP PBI
Method 2	30	Primary Care	VA REDCap (manual chart reviews)
Method 3	50	All encounters	OSP PBI
Method 4	300	Primary Care	OSP PBI

demo_data_raw |>  
    gg_histogram(
        x = flipper_length_mm,
        #x = bill_length_mm,
        col = species,
        #facet2 = island, 
        facet = species
        )
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

set.seed(911)

month <- seq(as.Date("2024-01-01"), as.Date("2024-07-01"), "month")

demo_sample <- function(.data, month, size = 15) {

#    https://stackoverflow.com/questions/50977337/calling-prop-test-function-in-r-with-dplyr    
    .data |>
        slice_sample(n = size) |>
        demo_aggregate() |>
        mutate(month = month, .before = 1) |>
        mutate(rate = map2(short_flipper, n, ~ prop.test(.x, .y, conf.level=0.95) %>%
                                     broom::tidy())) %>%
        tidyr::unnest(rate) %>% 
        select(
            month, 
            n, 
            short_flipper, 
            everything()
        )
    
}

# method 1
# sample of 15/month
method_1_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 15)) |>
    bind_rows()

set_blanket()

trend_plot <- function(.data, subtitle = "Method 1: sample 15") {
    
    .data |> 
       gg_pointrange(
            #subtitle = subtitle,
            x = month,  
            y = estimate,
            ymin = conf.low,
            ymax = conf.high,
            x_label = "",
            y_label = "",
            #y_label = "% Short Flipper", 
            y_labels = scales::percent,
            x_breaks = c(as.Date("2024-01-01"), as.Date("2024-07-01")),
            y_breaks = c(.2, .4, .6, .8, 1),
            y_expand_limits = c(.40, 1),
            #linewidth = 1, 
            ) +
        geom_line() +
        geom_hline(yintercept = .9, linetype = 'dashed') 
    
}


p1 <- 
    method_1_data |>
    trend_plot()
    
p1


# method 2
# sample of 30/month
method_2_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 30)) |>
    bind_rows()

p2 <- 
    method_2_data |>
    trend_plot(subtitle = "Method 2: sample 30")

#p2


# method 3
# sample of 0/month
method_3_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 50)) |>
    bind_rows()

p3 <- 
    method_3_data |>
    trend_plot(subtitle = "Method 3: sample 50")

#p3

# method 4
# sample of 300/month
method_4_data <-
    month |>
    map(~ demo_sample(demo_data_raw, .x, size = 300)) |>
    bind_rows()

p4 <- 
    method_4_data |>
    trend_plot(subtitle = "Method 4: sample 300")

#p3


p1 + p2 + p3 + p4 + 
     plot_layout(
         2, 2, 
         axes = "collect", axis_titles = "collect") + 
    plot_annotation(
        title = "Comparison of sampling methods on monthly trend lines",
        subtitle = glue("Underlying population rate: {scales::percent(demo_data_summary$flipper_rate)}"),
        tag_levels = "1", 
        tag_prefix = "Method ")

results_reactable <- function(.data) {
    
    .data |>
    select(-c(contains("bill"), long_flipper, estimate:parameter, method, alternative)) |> 
    reactable(
        columns = list(
            conf.low = colDef(format = colFormat(digits = 3)),
            conf.high = colDef(format = colFormat(digits = 3))
        )
    )
}
  
results_table <- function(
        .data, 
        #title = "Method 1", 
        #description = "this is the description", 
        #data_source = "OSP PBI") {
        method_table) { 
    .data |>
    select(-c(contains("bill"), long_flipper, estimate:parameter, method, alternative)) |> 
    gt() |> 
    tab_header(
        title = method_table$method_name, 
        subtitle = glue("Monthly sample of {method_table$sample_size} cases from {method_table$population}")
    ) |>
    tab_footnote(footnote = md(glue("Data source: {method_table$data_source}"))) |>
    fmt_percent(columns = c("flipper_rate", "conf.low", "conf.high"), decimals = 0) |>
    fmt_date(columns = c("month"), date_style = "yMMM") |>
    cols_merge_n_pct("short_flipper", "flipper_rate") |>
    cols_merge(columns = c("conf.low", "conf.high"), pattern = "[{1}, {2}]") |>
    cols_label(
        conf.low = "95% CI", 
        short_flipper = "Number Met (%)"
    ) |>
    cols_align(
        align = "center",
        columns = c("short_flipper", "conf.low")
    )
} 
    

method_1_data |> 
   results_table(methods_table[1, ])

month	n	Number Met (%)	95% CI
Method 1
Monthly sample of 15 cases from All encounters
Jan 2024	15	10 (67%)	[39%, 87%]
Feb 2024	15	10 (67%)	[39%, 87%]
Mar 2024	15	10 (67%)	[39%, 87%]
Apr 2024	15	12 (80%)	[51%, 95%]
May 2024	15	9 (60%)	[33%, 83%]
Jun 2024	15	10 (67%)	[39%, 87%]
Jul 2024	15	10 (67%)	[39%, 87%]
Data source: OSP PBI


method_2_data |> 
    results_table(methods_table[2, ])

month	n	Number Met (%)	95% CI
Method 2
Monthly sample of 30 cases from Primary Care
Jan 2024	30	22 (73%)	[54%, 87%]
Feb 2024	30	20 (67%)	[47%, 82%]
Mar 2024	30	17 (57%)	[38%, 74%]
Apr 2024	30	21 (70%)	[50%, 85%]
May 2024	30	19 (63%)	[44%, 79%]
Jun 2024	30	19 (63%)	[44%, 79%]
Jul 2024	30	18 (60%)	[41%, 77%]
Data source: VA REDCap (manual chart reviews)


method_3_data |> 
    results_table(methods_table[3, ])

month	n	Number Met (%)	95% CI
Method 3
Monthly sample of 50 cases from All encounters
Jan 2024	50	33 (66%)	[51%, 78%]
Feb 2024	50	41 (82%)	[68%, 91%]
Mar 2024	50	29 (58%)	[43%, 72%]
Apr 2024	50	36 (72%)	[57%, 83%]
May 2024	50	31 (62%)	[47%, 75%]
Jun 2024	50	30 (60%)	[45%, 73%]
Jul 2024	50	30 (60%)	[45%, 73%]
Data source: OSP PBI


method_4_data |> 
    results_table(methods_table[4, ])

month	n	Number Met (%)	95% CI
Method 4
Monthly sample of 300 cases from Primary Care
Jan 2024	300	200 (67%)	[61%, 72%]
Feb 2024	300	201 (67%)	[61%, 72%]
Mar 2024	300	199 (66%)	[61%, 72%]
Apr 2024	300	198 (66%)	[60%, 71%]
May 2024	300	199 (66%)	[61%, 72%]
Jun 2024	300	199 (66%)	[61%, 72%]
Jul 2024	300	197 (66%)	[60%, 71%]
Data source: OSP PBI

testing