Sex of Surgeons and Team Composition and Patients’ Length of Stay After Surgery: Evidence from Inpatient Claims Data in China

Author

Shen, Chi

Published

May 16, 2026

1. Preparation about analysis

1.1 Platform Info

print(version)

1.2 Loading packages

library(data.table)
library(stringr)
library(dplyr)
library(ggplot2)
library(lubridate)
library(readr)
library(purrr)
library(fixest)
library(modelsummary)
library(gtsummary)
library(flextable)
library(ggsci)
library(forestplot)
library(gt)
library(ggfixest)
library(flextable)
library(ggstats)
library(cowplot)
library(patchwork)
library(emmeans)
library(ggrepel)

options(scipen = 200)

2. Data manipulate procedure

2.1 Import dataset

#---#
setwd("~/ShenFiles/Research/Sex_Differences_of_Physician")

#---#
xiyi_2023_select <- fread("Datasets/Tidy Data/Xiyi_2023_tidy_2024-08-01_ONLY_SURGERY_CLEAN.csv")

2.2 Data Clean and Sample selection

2.2.1 Drop the records with the count of surgeries less than 10 thousand

surg_code_cat <- xiyi_2023_select[, .N, by = substr(surg_code, 1, 5)]

surg_code_cat_10k <- surg_code_cat[N >= 10000 & substr != "-",]

xiyi_2023_select[, "surg_code_f5" := substr(surg_code, 1, 5)]

xiyi_2023_ana_1 <- 
    xiyi_2023_select[surg_code_f5 %in% surg_code_cat_10k$substr,]

2.2.2 Drop the records with the count of surgeries in one hospital less than 1 thousand

#----#
hosname <- 
    xiyi_2023_select[surg_code_f5 %in% surg_code_cat_10k$substr, .N, by = insti_name]

hosname_1k <- hosname[N >= 1000, ]

#----#
xiyi_2023_ana_2 <- 
    xiyi_2023_ana_1[insti_name %in% hosname_1k$insti_name,]

2.2.3 Drop the records above the 99th percentile of LOS

xiyi_2023_ana_2[, c("los", "losas") := list(
    round(as.numeric(ymd(discharge_date_std) - ymd(admission_date_std)), 1),
    round(as.numeric(ymd(discharge_date_std) - ymd(surg_date_std)), 1)
)]


los_99 <- quantile(xiyi_2023_ana_2$los, 0.99)

xiyi_2023_ana <-
    xiyi_2023_ana_2[los <= los_99, ]

rm(xiyi_2023_ana_1, xiyi_2023_ana_2)

2.2.4 Add the info about hospital’s level and surgery operation type

#----#
# Add the info about hospital's level

xiyi_2023_ana <- left_join(xiyi_2023_ana, 
                           hospital_class,
                           by = "insti_name")


#----#
# Add the info about surgery operation type

xiyi_2023_ana <- left_join(xiyi_2023_ana, 
                           surgery_cat,
                           by = "surg_code_f5")

2.3 Variable derive

2.3.1 Primary outcomes

  • Length of stay: Discharge date - Admitted date

  • Length of stay after surgery: Discharge date - Surgery date

2.3.2 Calculate the Num of surgeon at once admission

xiyi_2023_ana[, "surg_num" := as.numeric(!is.na(surg_date)) +
                              as.numeric(!is.na(surg_date_1)) + 
                              as.numeric(!is.na(surg_date_2)) + 
                              as.numeric(!is.na(surg_date_3)) + 
                              as.numeric(!is.na(surg_date_4)) +
                              as.numeric(!is.na(surg_date_5)) + 
                              as.numeric(!is.na(surg_date_6))]

#----#
# 但是忽略了手术夸凌晨12点的情况

xiyi_2023_ana[, "surg_sameday_yn" := ifelse((rowSums(.SD, na.rm = TRUE) / surg_date) %in% 1:7, 1, 0), 
              .SDcols = c("surg_date", "surg_date_1", "surg_date_2", "surg_date_3",
                          "surg_date_4", "surg_date_5", "surg_date_6") ]

2.3.3 Control variable

  • Month
xiyi_2023_ana[, "month" := substr(year, 5, 6)]
xiyi_2023_ana[, "payment_std_new" := ifelse(payment_std %in% c(2, 3), 2, payment_std)]

2.4 Sample Information

  • Patients
xiyi_2023_ana[, c("age_pat", "sex_pat", "payment_std_new",
                  "admission_way", "categoryEN", "surg_num", 
                  "surg_sameday_yn", "insti_name", "class")] %>%
  tbl_summary(by = sex_pat,
              missing = "ifany",
              missing_text = "Missing Value",
              type = list(surg_num ~ "continuous", surg_sameday_yn ~ "categorical"),
              label = list(age_pat = "Age, yr", 
                           admission_way = "Amission Way", payment_std = "Payment Way"),
              statistic = list(
                         all_continuous() ~ "{mean}({sd})",
                         all_categorical() ~ "{n} ({p}%)"
                      ),
              digits = list(
                         all_continuous() ~ 1,
                         all_categorical() ~ 1
                     )) %>% add_overall() %>% 
              as_flex_table() %>%
              save_as_docx(path = "Outputs/Table-1 Sample description patient.docx")
  • Surgeon
#-------#

staff_selected_id <- unique(xiyi_2023_ana[, "person_id_no"])

staff_database_cln[person_id_no %in% staff_selected_id$person_id_no, ] %>%
  tbl_summary(include = c(age, final_edu_code_new, working_yr, birth_cohort),
              by = sex,
              missing = "ifany",
              missing_text = "Missing Value",
              label = list(age = "Age, yr", 
                           sex = "Sex", 
                           final_edu_code_new = "Education Levels", 
                           birth_cohort = "Birth Cohort"),
              statistic = list(
                         all_continuous() ~ "{mean}({sd})",
                         all_categorical() ~ "{n} ({p}%)"
                      ),
              digits = list(
                         all_continuous() ~ 1,
                         all_categorical() ~ 1
                     )) %>% add_overall() %>% 
              add_p(list(age ~ "t.test", working_yr ~ "t.test"), 
                    test.args = list(age ~ list(var.equal = TRUE),
                                     working_yr ~ list(var.equal = TRUE))) %>%
              as_flex_table() %>%
              save_as_docx(path = "Outputs/Table-2 Sample description surgeon (Add ttest).docx")
  • Surgery
df_fig_1a <-
    xiyi_2023_ana %>%
        mutate(sex_phy_fct = factor(sex_phy, levels = c("1", "2"), 
                                    labels = c("Male", "Female"))) %>%
        group_by(sex_phy_fct, surg_num) %>%
        summarise(N = n())


obs_male <- sum(df_fig_1a[df_fig_1a$sex_phy_fct == "Male", ]$N)
obs_female <- sum(df_fig_1a[df_fig_1a$sex_phy_fct == "Female", ]$N)

df_fig_1a$N_total <- ifelse(df_fig_1a$sex_phy_fct == "Male", 
                            obs_male, obs_female)

df_fig_1a$pct <- df_fig_1a$N * 100  / df_fig_1a$N_total

#------#

fig_1_a <-
    df_fig_1a %>%
        ggplot(aes(x = factor(surg_num), y = pct, fill = sex_phy_fct)) +
            geom_bar(stat = "identity",
                     position = "dodge") +
            geom_text(aes(label = sprintf("%0.1f", pct)),
                      position = position_dodge(1),
                      vjust = -1,
                      size = 3,
                      family = "Times") +
            scale_fill_jama(name = "Surgeon's sex") +
            labs(x = "Number of types of surgery performed in one admission", 
                 y = "Percentage (%)") +
            lims(y = c(0, 60)) +
            theme_bw() +
                theme(text = element_text(family = "Times"), 
                      legend.position = "bottom")
        
#-------------------------#
df_fig_1b <-
    xiyi_2023_ana %>% 
        group_by(sex_phy, surg_code_f5) %>%
        summarise(N = n()) %>%
        mutate(sex_phy_fct = factor(sex_phy, levels = c("1", "2"), 
                                    labels = c("Male", "Female")))

obs_male <- sum(df_fig_1b[df_fig_1b$sex_phy_fct == "Male", ]$N)
obs_female <- sum(df_fig_1b[df_fig_1b$sex_phy_fct == "Female", ]$N)

df_fig_1b$N_total <- ifelse(df_fig_1b$sex_phy_fct == "Male", 
                            obs_male, obs_female)

df_fig_1b$pct <- df_fig_1b$N * 100  / df_fig_1b$N_total

#------#


fig_1_b <- 
    df_fig_1b %>%
        ggplot(., aes(x = ifelse(sex_phy_fct == "Male", -pct, pct),
                      y = surg_code_f5,
                      fill = sex_phy_fct
                      )) + 
          geom_bar(stat = "identity") +
          geom_text(aes(x = ifelse(sex_phy_fct == "Male", -pct - 2, pct + 2), 
                        label = sprintf("%0.1f", pct)), family = "Times") +
          labs(x = "Percentage (%)", 
               y = "ICD Code of Surgery") +
          scale_fill_jama(name = "Surgeon's sex") +
          theme_bw() +
            theme(text = element_text(family = "Times"),
                  axis.text.x = element_blank(),
                  legend.position = "bottom")

pdf("Outputs/Fig-1 Distribution of number of types of surgery performed in one admission & Surgery type.pdf", height = 8, width = 10)
fig_1_a + fig_1_b + plot_annotation(tag_levels = "A") 
dev.off()

3 Results

3.1 Comparative of LOS between male and female surgeons

3.1.1 Distribution of LOS and LOSAS by surgeon’s sex

fig_2_a <-
    xiyi_2023_ana %>%
        mutate(sex_phy_fct = factor(sex_phy, 
                                    levels = c(1, 2), 
                                    labels = c("Male", "Female"))) %>%
        ggplot(aes(x = losas, y = (..count..) * 100/sum(..count..), 
                   fill = factor(sex_phy_fct))) +
            geom_histogram(bins = 30, binwidth = 1,
                           color = "#e9ecef", alpha = 0.6, position = "identity") +
            scale_fill_jama(name = "") +
            labs(y = "Percentage (%)", x = "Length of stay after surgery (Days)") +
            lims(x = c(0, quantile(xiyi_2023_ana$losas, 0.99))) +
            theme_bw() +
            theme(text = element_text(family = "Times", size = 15),
                  legend.position = "bottom")

fig_2_b <-
    xiyi_2023_ana %>%
        mutate(sex_phy_fct = factor(sex_phy, 
                                    levels = c(1, 2), 
                                    labels = c("Male", "Female"))) %>%
        ggplot(aes(x = los,  y = (..count..) * 100/sum(..count..), 
                   fill = factor(sex_phy_fct))) +
            geom_histogram(bins = 30, binwidth = 1,
                           color = "#e9ecef", alpha = 0.6, position = "identity") +
            scale_fill_jama(name = "") +
            labs(y = "Percentage (%)", x = "Length of stay (Days)") +
            theme_bw() +
            theme(text = element_text(family = "Times", size = 15),
                  legend.position = "bottom")

pdf("Outputs/Fig-2 Distribution of LOS and LOSAS by sex of surgeon.pdf", height = 8, width = 10)
fig_2_a + fig_2_b + plot_annotation(tag_levels = "A") 
dev.off()

3.1.2 Multisurgery at once admission by surgeon’s sex

fig_3_a <-
    xiyi_2023_ana %>% 
        group_by(sex_phy, surg_num) %>%
        summarise(mean_losas = mean(losas, na.rm = TRUE)) %>%
        mutate(sex_phy_fct = factor(sex_phy, levels = c("1", "2"), 
                                    labels = c("Male", "Female"))) %>%
        ggplot(., aes(x = factor(surg_num),
                      y = mean_losas,
                      fill = sex_phy_fct
                      )) + 
          geom_bar(stat = "identity", alpha = 0.6, position = "dodge") +
          geom_text(aes(label = sprintf("%0.1f", mean_losas)), 
                    position = position_dodge(1),
                    vjust = -1,
                    family = "Times") +
          labs(x = "No. of surgeries at once admission", 
               y = "Length of stay after surgery (Day)") +
          scale_fill_lancet(name = "Surgeon's sex") +
          theme_bw() +
            theme(text = element_text(family = "Times"),
                  legend.position = "bottom")

3.1.3 Surgery type

  • Distribution of Top 20 surgery in male and female surgeon
#---------#

fig_3_b <- 
    xiyi_2023_ana %>% 
        group_by(sex_phy, surg_code_f5) %>%
        summarise(mean_losas = mean(losas, na.rm = TRUE)) %>%
        mutate(sex_phy_fct = factor(sex_phy, levels = c("1", "2"), 
                                    labels = c("Male", "Female"))) %>%
        ggplot(., aes(x = mean_losas,
                      y = surg_code_f5,
                      fill = sex_phy_fct
                      )) + 
          geom_bar(stat = "identity", alpha = 0.6, position = "dodge") +
          geom_text(aes(label = sprintf("%0.1f", mean_losas)), 
                    position = position_dodge(1),
                    hjust = -1,
                    family = "Times") +
          labs(x = "Length of stay after surgery (Day)", 
               y = "ICD Code of surgery") +
          scale_fill_lancet(name = "Surgeon's sex") +
          scale_x_continuous(breaks = c(0, 2, 4, 6, 8, 10, 12, 14), 
                             limits = c(0, 14)) +
          theme_bw() +
            theme(text = element_text(family = "Times"),
                  legend.position = "bottom")

pdf("Outputs/Fig-3 Distribution of LOSAS by surgery type and times.pdf", height = 8, width = 10)
fig_3_a + fig_3_b + plot_annotation(tag_levels = "A") 
dev.off()

3.1.4 Specific surgeries

  • Distribution figure
fig_6 <-
    xiyi_2023_ana %>% 
        group_by(sex_phy, surg_num, categoryEN) %>%
        summarise(mean_losas = mean(losas, na.rm = TRUE)) %>%
        mutate(sex_phy_fct = factor(sex_phy, levels = c("1", "2"), 
                                    labels = c("Male", "Female"))) %>%
        ggplot(., aes(x = factor(surg_num),
                      y = mean_losas,
                      fill = sex_phy_fct
                      )) + 
          geom_bar(stat = "identity", alpha = 0.6, position = "dodge") +
          geom_text(aes(label = sprintf("%0.1f", mean_losas)), 
                    position = position_dodge(1),
                    vjust = -1,
                    family = "Times") +
          facet_wrap(~ categoryEN) +
          labs(x = "No. of surgeries at once admission", 
               y = "Length of stay after surgery (Day)") +
          scale_fill_lancet(name = "Surgeon's sex") +
          ylim(c(0, 15)) +
          theme_bw() +
            theme(text = element_text(family = "Times"),
                  legend.position = "bottom")

pdf("Outputs/Fig-6 Distribution of LOSAS by times facet with surgery category.pdf", height = 8, width = 10)
fig_6
dev.off()

3.2 Main regression and Robust analysis

3.2.1 Define the function

losas_reg <- function(df) {

    data <- df

    fit_poi <-
        feglm(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    fit_negb <-
        fenegbin(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
    #---Post-hoc---#
    posthoc_df <- emm_poi <- emmeans(fit_poi, 
                   specs = ~ factor(sex_phy),
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()

    return(list("Poisson" = fit_poi, 
                "Negative Binomial" = fit_negb,
                posthoc_df))
    
}

3.2.2 Running the Main regressions

#----Full Sample----#

fit_full <- 
    xiyi_2023_ana %>% 
    losas_reg(.)

#----Surgery in Top 10----#

surg_code_10_vct <-
    xiyi_2023_ana %>% 
    group_by(surg_code_f5) %>%
    summarise(N = n()) %>%
    filter(rank(-N, ties.method = "first") <= 10)


fit_top10 <- 
    xiyi_2023_ana %>% 
    filter(surg_code_f5 %in% surg_code_10_vct$surg_code_f5) %>%
    losas_reg(.)

#----Remove Maternal and Child Health Hospital----#

fit_remove_preg <- 
    xiyi_2023_ana %>% 
    filter(!str_detect(insti_name, "妇幼")) %>%
    losas_reg(.)


#----Remove None Overlap surgeries----#

fit_overlap <- 
    xiyi_2023_ana %>% 
    filter(!(surg_code_f5 %in% c("88.55", "88.41", "85.21", "75.69", "74.1x", "73.6x",
                                 "73.59", "69.09", "68.29", "51.23", "47.01"))) %>%
    losas_reg(.)



#----#

modelsummary(
  list("Full Sample" = fit_full[[1]], 
       "Top 10 of surgical operation" = fit_top10[[1]],
       "Excluding Maternal and Child Health Hospitals" = fit_remove_preg[[1]],
       "Selected surgical operation" = fit_overlap[[1]]),
  coef_rename = c("factor(sex_phy)2" = "Surgeon's sex (Ref: Male)", 
                  "age_phy" = "Surgeon's Age"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_omit = "Intercept",
  out = "Outputs/Table-3-1 Main estimation (Hos-month fix)-Poisson.docx")

#----#

modelsummary(
  list("Full Sample" = fit_full[[2]], 
       "Top 10 of surgical operation" = fit_top10[[2]],
       "Excluding Maternal and Child Health Hospitals" = fit_remove_preg[[2]],
       "Selected surgical operation" = fit_overlap[[2]]),
  coef_rename = c("factor(sex_phy)2" = "Surgeon's sex (Ref: Male)", 
                  "age_phy" = "Surgeon's Age"),
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_omit = "Intercept",
  out = "Outputs/Table-3-2 Main estimation (Hos-month fix)-Negative Binomial.docx")

3.2.3 Present in Figures

modelplot_text <- function(modelist, map, title = "") {
  
  plot_data <- 
    modelplot(
      modelist, 
      coef_map = map,
      draw = FALSE
      ) 

  ggplot(plot_data, aes(x = estimate, y = term, color = model)) +
    geom_linerange(aes(xmin = conf.low, xmax = conf.high), 
                   linewidth = 1,
                   position = position_dodge2(0.3)) +
    geom_point(size = 3, 
               color = "darkblue",
               position = position_dodge2(0.3)) +
    geom_text_repel(
      aes(label = paste0(sprintf("%0.3f", estimate), 
                        " [", 
                        sprintf("%0.4f", conf.low), " to ", 
                        sprintf("%0.4f", conf.high), "]")), 
      position = position_dodge2(0.3),
      size = 5,
      color = "black",
      seed = 1234,
      family = "Times") +
    geom_vline(xintercept = 0, 
              linetype = "dashed", 
              color = "black", 
              linewidth = 0.8) +
    labs(title = title,
         y = "",
         x = "Coefficient Estimates of LOSAS and 95% Confidence Intervals",
         color = "") +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = "bottom",
          text = element_text(family = "Times", size = 15)) +
    guides(color=guide_legend(nrow = 2, byrow = TRUE))
  
}



fig_fit_1 <- 
  modelplot_text(
    list("A: Full Sample" = fit_full[[1]], 
         "B: Top 10 of surgical operation" = fit_top10[[1]],
         "C: Excluding Maternal and Child Health Hospitals" = fit_remove_preg[[1]],
         "D: Selected surgical operation" = fit_overlap[[1]]
          ), 
    map = c("factor(sex_phy)2" = "Female Surgeon"),
    title = "Outpatient and Emergency Department Admission (Poisson)"
    )



pdf("Outputs/Fig-4 Main estimation (Figure of Table 3-1 to 3-3).pdf", height = 8, width = 12)

fig_fit_1

dev.off()

3.2.4 Post-hoc of main regression

posthoc_df_full <- fit_full[[3]]

posthoc_df_full$sample <- "A: Full Sample"

#------#

posthoc_df_top10 <- fit_top10[[3]]

posthoc_df_top10$sample <- "B: Top 10 of surgical operation"

#------#

posthoc_df_remove_preg <- fit_remove_preg[[3]]

posthoc_df_remove_preg$sample <- "C: Excluding Maternal and Child Health Hospitals"

#------#

posthoc_df_overlap <- fit_overlap[[3]]

posthoc_df_overlap$sample <- "D: Selected surgical operation"

#------#



posthoc_df <- rbind(posthoc_df_full,
                    posthoc_df_top10,
                    posthoc_df_remove_preg,
                    posthoc_df_overlap)

posthoc_df$sex_phy <- factor(posthoc_df$sex_phy,
                             levels = c(1, 2),
                             labels = c("Male", "Female"))

flextable(posthoc_df) %>% 
  save_as_docx(path = "Outputs/Table Post-hoc of main regression.docx")

3.3 Heterogeneity analysis

3.3.1 Emergency status

  • The ED source was selected to reflect the emergency status.
losas_reg_heter <- function(df) {

    data <- df

    fit_poi <-
        feglm(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    fit_negb <-
        fenegbin(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
        #---Post-hoc---#
    posthoc_df <- emm_poi <- emmeans(fit_poi, 
                   specs = ~ factor(sex_phy),
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()

    return(list("Poisson" = fit_poi, 
                "Negative Binomial" = fit_negb,
                posthoc_df))
    
}



#----Full Sample----#

fit_full_ed <- 
    xiyi_2023_ana %>% 
    filter(admission_way == 1) %>%
    losas_reg_heter(.)

#----Surgery in Top 10----#

surg_code_10_vct <-
    xiyi_2023_ana %>% 
    group_by(surg_code_f5) %>%
    summarise(N = n()) %>%
    filter(rank(-N, ties.method = "first") <= 10)


fit_top10_ed <- 
    xiyi_2023_ana %>% 
    filter(admission_way == 1) %>%
    filter(surg_code_f5 %in% surg_code_10_vct$surg_code_f5) %>%
    losas_reg_heter(.)

#----Remove Maternal and Child Health Hospital----#

fit_remove_preg_ed <- 
    xiyi_2023_ana %>% 
    filter(admission_way == 1) %>%
    filter(!str_detect(insti_name, "妇幼")) %>%
    losas_reg_heter(.)


#----Remove None Overlap surgeries----#

fit_overlap_ed <- 
    xiyi_2023_ana %>% 
    filter(admission_way == 1) %>%
    filter(!(surg_code_f5 %in% c("88.55", "88.41", "85.21", "75.69", "74.1x", "73.6x",
                                 "73.59", "69.09", "68.29", "51.23", "47.01"))) %>%
    losas_reg_heter(.)


#-----# 

fig_fit_1_ed <- 
  modelplot_text(
    list("A: Full Sample" = fit_full_ed[[1]], 
         "B: Top 10 of surgical operation" = fit_top10_ed[[1]],
         "C: Excluding Maternal and Child Health Hospitals" = fit_remove_preg_ed[[1]],
         "D: Selected surgical operation" = fit_overlap_ed[[1]]
          ), 
    map = c("factor(sex_phy)2" = "Female Surgeon"),
    title = "Only Emergency Department Admission (Poisson)"
    )



#-----# 

pdf("Outputs/Fig-5 Heterogeneity analysis-emergency status.pdf", height = 8, width = 12)

fig_fit_1_ed

dev.off()

3.3.1.1 Negative-Binomial in Appendix

fig_fit_1_ed_negb <- 
  modelplot_text(
    list("A: Full Sample" = fit_full_ed[[2]], 
         "B: Top 10 of surgical operation" = fit_top10_ed[[2]],
         "C: Excluding Maternal and Child Health Hospitals" = fit_remove_preg_ed[[2]],
         "D: Selected surgical operation" = fit_overlap_ed[[2]]
          ), 
    map = c("factor(sex_phy)2" = "Female Surgeon"),
    title = "Only Emergency Department Admission (Negative Binomial)"
    )


#-----# 

pdf("Outputs/Fig-5-1 Heterogeneity analysis-emergency status (Negative Binomial).pdf", height = 8, width = 12)

fig_fit_1_ed_negb

dev.off()

3.3.1.2 Post-hoc results

posthoc_df_full_ed <- fit_full_ed[[3]]

posthoc_df_full_ed$sample <- "A: Full Sample"

#------#

posthoc_df_top10_ed <- fit_top10_ed[[3]]

posthoc_df_top10_ed$sample <- "B: Top 10 of surgical operation"

#------#

posthoc_df_remove_preg_ed <- fit_remove_preg_ed[[3]]

posthoc_df_remove_preg_ed$sample <- "C: Excluding Maternal and Child Health Hospitals"

#------#

posthoc_df_overlap_ed <- fit_overlap_ed[[3]]

posthoc_df_overlap_ed$sample <- "D: Selected surgical operation"

#------#


posthoc_df_ed <- rbind(posthoc_df_full_ed,
                       posthoc_df_top10_ed,
                       posthoc_df_remove_preg_ed,
                       posthoc_df_overlap_ed)


posthoc_df_ed$sex_phy <- factor(posthoc_df_ed$sex_phy,
                                levels = c(1, 2),
                                labels = c("Male", "Female"))

flextable(posthoc_df_ed) %>% 
  save_as_docx(path = "Outputs/Table Post-hoc of ED.docx")

3.3.2 Surgical complexity or elective

  • The number of surgical site at one surgery was selected to reflect the complexity.
losas_reg_complex <- function(df) {

    data <- df

    fit_poi <-
        feglm(losas ~ factor(sex_phy) + factor(payment_std_new) + age_phy +
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    fit_negb <-
        fenegbin(losas ~ factor(sex_phy) + factor(payment_std_new) + age_phy +
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
            #---Post-hoc---#
    posthoc_df <- emm_poi <- emmeans(fit_poi, 
                   specs = ~ factor(sex_phy),
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()

    return(list("Poisson" = fit_poi, 
                "Negative Binomial" = fit_negb,
                posthoc_df))
    
}

#--------#


fit_full_clx_1 <- 
    xiyi_2023_ana %>% 
    filter(surg_sameday_yn == 1 & surg_num == 1) %>%
    losas_reg_complex(.)

fit_full_clx_2 <- 
    xiyi_2023_ana %>% 
    filter(surg_sameday_yn == 1 & surg_num == 2) %>%
    losas_reg_complex(.)

fit_full_clx_3 <- 
    xiyi_2023_ana %>% 
    filter(surg_sameday_yn == 1 & surg_num == 3) %>%
    losas_reg_complex(.)

fit_full_clx_4 <- 
    xiyi_2023_ana %>% 
    filter(surg_sameday_yn == 1 & surg_num >= 4) %>%
    losas_reg_complex(.)

#-----# 


fig_fit_1_clx <- 
  modelplot_text(
    list("A: 1 types of surgeries in one operation" = fit_full_clx_1[[1]], 
         "B: 2 types of surgeries in one operation" = fit_full_clx_2[[1]],
         "C: 3 types of surgeries in one operation" = fit_full_clx_3[[1]],
         "D: ≥ 4 types of surgeries in one operation" = fit_full_clx_4[[1]]
          ), 
    map = c("factor(sex_phy)2" = "Female Surgeon"),
    title = "Full Sample (Poisson)"
    )



#---------#

pdf("Outputs/Fig-6 Heterogeneity analysis-complexity.pdf", height = 8, width = 12)

fig_fit_1_clx

dev.off()

3.3.2.1 Negative-Binomial in Appendix

fig_fit_1_clx_negb <- 
  modelplot_text(
    list("A: 1 types of surgeries in one operation" = fit_full_clx_1[[2]], 
         "B: 2 types of surgeries in one operation" = fit_full_clx_2[[2]],
         "C: 3 types of surgeries in one operation" = fit_full_clx_3[[2]],
         "D: ≥ 4 types of surgeries in one operation" = fit_full_clx_4[[2]]
          ), 
    map = c("factor(sex_phy)2" = "Female Surgeon"),
    title = "Full Sample (Negative Binomial)"
    )


#---------#

pdf("Outputs/Fig-6-1 Heterogeneity analysis-complexity (Negative Binomial).pdf", height = 8, width = 12)

fig_fit_1_clx_negb

dev.off()

3.3.2.2 Post-hoc results

posthoc_df_clx_1 <- fit_full_clx_1[[3]]

posthoc_df_clx_1$sample <- "A: 1 types of surgeries in one operation"

#------#

posthoc_df_clx_2 <- fit_full_clx_2[[3]]

posthoc_df_clx_2$sample <- "B: 2 types of surgeries in one operation"

#------#

posthoc_df_clx_3 <- fit_full_clx_3[[3]]

posthoc_df_clx_3$sample <- "C: 3 types of surgeries in one operation"

#------#

posthoc_df_clx_4 <- fit_full_clx_4[[3]]

posthoc_df_clx_4$sample <- "D: ≥ 4 types of surgeries in one operation"

#------#



posthoc_df_clx <- rbind(posthoc_df_clx_1,
                        posthoc_df_clx_2,
                        posthoc_df_clx_3,
                        posthoc_df_clx_4)


posthoc_df_clx$sex_phy <- factor(posthoc_df_clx$sex_phy,
                                levels = c(1, 2),
                                labels = c("Male", "Female"))

flextable(posthoc_df_clx) %>% 
  save_as_docx(path = "Outputs/Table Post-hoc of complex.docx")

3.3.3 Highest educated surgeons

losas_reg_edu <- function(df) {

    data <- df

    fit_poi <-
        feglm(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    fit_negb <-
        fenegbin(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)

    return(list("Poisson" = fit_poi, 
                "Negative Binomial" = fit_negb))
    
}


#----Full Sample----#

fit_full_edu <- 
    xiyi_2023_ana %>% 
    filter(final_edu_code_new == 1) %>%
    losas_reg_edu(.)

#----Surgery in Top 10----#

surg_code_10_vct <-
    xiyi_2023_ana %>% 
    group_by(surg_code_f5) %>%
    summarise(N = n()) %>%
    filter(rank(-N, ties.method = "first") <= 10)


fit_top10_edu <- 
    xiyi_2023_ana %>% 
    filter(final_edu_code_new == 1) %>%
    filter(surg_code_f5 %in% surg_code_10_vct$surg_code_f5) %>%
    losas_reg_edu(.)

#----Remove Maternal and Child Health Hospital----#

fit_remove_preg_edu <- 
    xiyi_2023_ana %>% 
    filter(final_edu_code_new == 1) %>%
    filter(!str_detect(insti_name, "妇幼")) %>%
    losas_reg_edu(.)


#----Remove None Overlap surgeries----#

fit_overlap_edu <- 
    xiyi_2023_ana %>% 
    filter(final_edu_code_new == 1) %>%
    filter(!(surg_code_f5 %in% c("88.55", "88.41", "85.21", "75.69", "74.1x", "73.6x",
                                 "73.59", "69.09", "68.29", "51.23", "47.01"))) %>%
    losas_reg_edu(.)


#-----# 

fig_fit_1_edu <-
  modelplot_text(fit_full_edu, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "A: Full Sample \n (Postgraduate Surgeon)")

fig_fit_2_edu <-
  modelplot_text(fit_top10_edu, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "B: Top 10 of surgical operation \n (Postgraduate Surgeon)")

fig_fit_3_edu <-
  modelplot_text(fit_remove_preg_edu, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "C: Excluding Maternal and Child Health Hospitals \n (Postgraduate Surgeon)")

fig_fit_4_edu <-
  modelplot_text(fit_overlap_edu, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "D: Selected surgical operation \n (Postgraduate Surgeon)")



#-----# 

pdf("Outputs/Fig-7 Heterogeneity analysis-Postgraduate.pdf", height = 10, width = 16)

(fig_fit_1_edu + fig_fit_2_edu) / (fig_fit_3_edu + fig_fit_4_edu)

dev.off()

3.3.4 High experience surgeons

losas_reg_year <- function(df) {

    data <- df

    fit_poi <-
        feglm(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    fit_negb <-
        fenegbin(losas ~ factor(sex_phy) + surg_num + age_phy +
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                factor(final_edu_code_new) + insti_name + 
                insti_name^month + surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)

    return(list("Poisson" = fit_poi, 
                "Negative Binomial" = fit_negb))
    
}



#----Full Sample----#

fit_full_year <- 
    xiyi_2023_ana %>% 
    filter(working_yr >= 20) %>%
    losas_reg_year(.)

#----Surgery in Top 10----#

surg_code_10_vct <-
    xiyi_2023_ana %>% 
    group_by(surg_code_f5) %>%
    summarise(N = n()) %>%
    filter(rank(-N, ties.method = "first") <= 10)


fit_top10_year <- 
    xiyi_2023_ana %>% 
    filter(working_yr >= 20) %>%
    filter(surg_code_f5 %in% surg_code_10_vct$surg_code_f5) %>%
    losas_reg_year(.)

#----Remove Maternal and Child Health Hospital----#

fit_remove_preg_year <- 
    xiyi_2023_ana %>% 
    filter(working_yr >= 20) %>%
    filter(!str_detect(insti_name, "妇幼")) %>%
    losas_reg_year(.)


#----Remove None Overlap surgeries----#

fit_overlap_year <- 
    xiyi_2023_ana %>% 
    filter(working_yr >= 20) %>%
    filter(!(surg_code_f5 %in% c("88.55", "88.41", "85.21", "75.69", "74.1x", "73.6x",
                                 "73.59", "69.09", "68.29", "51.23", "47.01"))) %>%
    losas_reg_year(.)


#-----# 

fig_fit_1_year <-
  modelplot_text(fit_full_year, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "A: Full Sample \n (≥ 20 Years of Working Experience)")

fig_fit_2_year <-
  modelplot_text(fit_top10_year, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "B: Top 10 of surgical operation \n (≥ 20 Years of Working Experience)")

fig_fit_3_year <-
  modelplot_text(fit_remove_preg_year, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "C: Excluding Maternal and Child Health Hospitals \n (≥ 20 Years of Working Experience)")

fig_fit_4_year <-
  modelplot_text(fit_overlap_year, 
                 map = c("factor(sex_phy)2" = "Female Surgeon"),
                 title = "D: Selected surgical operation \n (≥ 20 Years of Working Experience)")



#-----# 

pdf("Outputs/Fig-8 Heterogeneity analysis-20 years working experience.pdf", height = 10, width = 16)

(fig_fit_1_year + fig_fit_2_year) / (fig_fit_3_year + fig_fit_4_year)

dev.off()

3.4 Sex interaction between patient and surgeon

3.4.1 Define the function

losas_reg_patsurg <- function(df) {

    data <- df
    data$sex_phy_minus1 <- data$sex_phy - 1
    data$sex_pat_minus1 <- data$sex_pat - 1
    
    
    fit_logols <-
        feols(losas ~ sex_phy_minus1 * sex_pat_minus1  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat + age_phy | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
    
    fit_poi <-
        feglm(losas ~ sex_phy_minus1 * sex_pat_minus1  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat + age_phy | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    #------#
    emm_logols <- emmeans(fit_logols, 
                   specs = ~ sex_phy_minus1 * sex_pat_minus1,
                   infer = TRUE,
                   data = data) %>% as_tibble()
    
     #------#
     emm_poi <- emmeans(fit_poi, 
                   specs = ~ sex_phy_minus1 * sex_pat_minus1,
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()
    
    return(list(emm_logols, emm_poi))
    
}

3.4.2 Run the regressions

  • Poisson
fit_full_intact_pat <- 
    xiyi_2023_ana %>% 
    losas_reg_patsurg(.)

df_inter_pat_2 <-
  fit_full_intact_pat[[2]] %>%
    mutate(sex_phy = factor(sex_phy_minus1, 
                            levels = c(0, 1), labels = c("Male", "Female")),
           sex_pat = factor(sex_pat_minus1, 
                            levels = c(0, 1), labels = c("Male", "Female"))) 

3.5 Sex interaction between surgeon and assistant

3.5.1 Define the function

losas_reg_aidsurg <- function(df) {

    data <- df
    data$sex_phy_minus1 <- data$sex_phy - 1
    data$sex_aid_minus1 <- data$sex_aid - 1
    
    
    fit_logols <-
        feols(losas ~ sex_phy_minus1 * sex_aid_minus1  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat + age_phy | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
    
    fit_poi <-
        feglm(losas ~ sex_phy_minus1 * sex_aid_minus1  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat + age_phy | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    #------#
    emm_logols <- emmeans(fit_logols, 
                   specs = ~ sex_phy_minus1 * sex_aid_minus1,
                   infer = TRUE,
                   data = data) %>% as_tibble()
    
     #------#
     emm_poi <- emmeans(fit_poi, 
                   specs = ~ sex_phy_minus1 * sex_aid_minus1,
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()
    
    return(list(emm_logols, emm_poi))
    
}

3.5.2 Run the regressions

  • Poisson
xiyi_2023_aid <- 
    filter(xiyi_2023_ana,
           surg_aide_I != "" & str_detect(surg_aide_I, "[A-Za-z0-9_-]+") == FALSE
          )


xiyi_2023_aid_fin <-
    inner_join(xiyi_2023_aid,
               staff_database_cln[, c("insti_name_new", "name", "sex", "age")],
               by = c("insti_name" = "insti_name_new", "surg_aide_I" = "name")
              )

xiyi_2023_aid_fin <-
    xiyi_2023_aid_fin %>%
    rename("sex_aid" = "sex", "age_aid" = "age")



#----Full Sample----#

fit_full_intact_aid <- 
    xiyi_2023_aid_fin %>% 
    losas_reg_aidsurg(.)

#----------#

df_inter_aid_2 <-
  fit_full_intact_aid[[2]] %>%
    mutate(sex_phy = factor(sex_phy_minus1, 
                            levels = c(0, 1), labels = c("Male", "Female")),
           sex_aid = factor(sex_aid_minus1, 
                            levels = c(0, 1), labels = c("Male", "Female"))) 

3.6 Interaction between surgeon’s sex and birth cohort

3.6.1 Define the function

losas_reg_cohsurg <- function(df) {

    data <- df
    data$sex_phy_minus1 <- data$sex_phy - 1
    
    fit_logols <-
        feols(losas ~ sex_phy_minus1 * birth_cohort  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              data = data)
    
    
    fit_poi <-
        feglm(losas ~ sex_phy_minus1 * birth_cohort  + surg_num + 
                      surg_sameday_yn + factor(payment_std_new) + 
                      factor(admission_way) + age_pat | 
                      factor(final_edu_code_new) + insti_name^month + 
                      surg_code_f5 + categoryEN,
              cluster = ~ insti_name^month,
              family = "poisson",
              data = data)

    #------#
    emm_logols <- emmeans(fit_logols, 
                   specs = ~ sex_phy_minus1 * birth_cohort,
                   infer = TRUE,
                   data = data) %>% as_tibble()
    
     #------#
     emm_poi <- emmeans(fit_poi, 
                   specs = ~ sex_phy_minus1 * birth_cohort,
                   type = "response",
                   link = "log",
                   data = data) %>% as_tibble()
    
    return(list(emm_logols, emm_poi))
    
}

3.6.2 Run the regressions

  • Poisson
fit_full_intact_cohort <- 
    xiyi_2023_ana %>% 
    losas_reg_cohsurg(.)

#------#

fig_inter_coh_2 <-
  fit_full_intact_cohort[[2]] %>%
    mutate(sex_phy = factor(sex_phy_minus1, 
                            levels = c(0, 1), labels = c("Male", "Female"))) %>%
    ggplot(., aes(x = birth_cohort, 
                  y = rate, 
                  color = sex_phy, 
                  group = sex_phy)) +
      geom_point(size = 3) +                
      geom_line(linetype = "solid") +       
      geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.02) + 
      geom_text_repel(
        aes(label = paste0(sprintf("%0.2f", rate), 
                          " [", 
                          sprintf("%0.2f", asymp.LCL), " to ", 
                          sprintf("%0.2f", asymp.UCL), "]"),
            color = sex_phy), 
        size = 5,
        seed = 1234,
        family = "Times") +
      labs(
        x = "Birth Cohort of Surgeon",
        y = "Predicted LOSAS (Days)",
        color = "Surgeon's Sex ",
        title = "B: Birth Cohort of Surgeon (Poisson Estimate)"
      ) +
      scale_color_brewer(palette = "Set1") + 
      theme_bw() +                     
      theme(plot.title = element_text(hjust = 0.5),
            legend.position = "bottom",
            text = element_text(size = 15, family = "Times"))

3.7 Output all the interaction plots

df_inter_pat_2_sub <-
  df_inter_pat_2 %>%
  select(rate, asymp.LCL, asymp.UCL, sex_phy, sex_pat) %>%
  mutate(categ = paste(sex_pat, "Patient"))


df_inter_aid_2_sub <-
  df_inter_aid_2 %>%
  select(rate, asymp.LCL, asymp.UCL, sex_phy, sex_aid) %>%
  mutate(categ = paste(sex_aid, "Assistant"))


df_inter_sub <- rbind(df_inter_pat_2_sub[, -5],
                      df_inter_aid_2_sub[, -5])

#------#

fig_inter_1 <-
  df_inter_sub %>%
      mutate(categ_fct = factor(categ, 
                                levels = c("Female Assistant",
                                           "Male Assistant",
                                           "Female Patient",
                                           "Male Patient"))) %>%
      ggplot(., aes(x = categ_fct, 
                    y = rate, 
                    color = sex_phy, 
                    group = sex_phy)) +
        geom_point(size = 3, position = position_dodge(width = 0.5)) +                 
        geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), 
                      width = 0.05,
                      position = position_dodge(width = 0.5)) + 
        geom_text_repel(
          aes(label = paste0(round(rate, 2), 
                            " [", 
                            round(asymp.LCL, 2), " to ", 
                            round(asymp.UCL, 2), "]"), 
              color = sex_phy), 
          size = 5,
          seed = 1234,
          family = "Times") +
        labs(
          x = "",
          y = "Predicted LOSAS (Days) and 95% Confidence Intervals",
          color = "Surgeon's Sex ",
          title = "A: Patient' Sex and Assistant's Sex (Poisson Estimate)"
        ) +
        scale_color_brewer(palette = "Set1") + 
        theme_bw() +                     
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "bottom",
              text = element_text(size = 15, family = "Times"))


pdf("Outputs/Fig-9 Interaction Post hoc.pdf", height = 14, width = 12)

fig_inter_1 / fig_inter_coh_2
  
dev.off()

The END