knitr::opts_chunk$set(warning=FALSE, message=FALSE, out.width = '100%')

#Cancer LCDI Project syntax example 2----

#Libraries ----

library(dplyr)
library(survival)
library(survminer)
library(forcats)
library(readxl)
library(haven)
library(tidyverse)
library(lubridate)
library(data.table)
library(tidyr)
library(stringr)
library(readr)
library(glue)
library(collapsibleTree)
library(ggplot2)
library(ggalluvial)
library(writexl)
library(networkD3)
library(rmarkdown)
library(DT)
library(knitr)
library(kableExtra)
library(plotly)

#rounding function

round2 <- function(x, n) {
  posneg = sign(x)
  z = abs(x)*10^n
  z = z + 0.5
  z = trunc(z)
  z = z/10^n
  z * posneg
}


 
#SMR06 ----

#For all columns use this: "Z:/Oncology Data/Cancer LCDI/SMR06/SMR06 col names pos.csv"
#This is a table derived from the NHS Boards file layouts file that comes from the SMR linked file that NHS Lothian downloads from ISD
#Please note that not all variables are read into R

SMR06headers<-read_csv("Z:/Oncology Data/Cancer LCDI/SMR06/SMR06 Kept cols.csv")
PosFrom <- SMR06headers$Position_From
PosTo <- SMR06headers$Position_To
SMR06_header<-SMR06headers$Field_name


#REMEMBER TO SPECIFY GUESS_MAX to avoid warnings!.


read_fwf("Z:/Oncology Data/Cancer LCDI/SMR06/S_mrl_smr06.dat", fwf_positions(PosFrom,PosTo,
                                                                                  col_names = SMR06_header),guess_max = 200000) %>% 
  select(-c(SURNAME,SURNAME_SNDEX_CODE,PREVIOUS_SNDEX_CODE,PREVIOUS_SURNAME,FIRST_FORENAME)) -> SMR06

#Changing dates to date format


SMR06$INCIDENCE_DATE <- SMR06$INCIDENCE_DATE %>% ymd() 
SMR06$DATE_OF_BIRTH <- SMR06$DATE_OF_BIRTH %>% ymd()
SMR06$DATE_OF_DEATH <- SMR06$DATE_OF_DEATH %>% ymd() 
SMR06$DATE_1ST_CHEMO <-SMR06$DATE_1ST_CHEMO %>% ymd() 
SMR06$DATE_1ST_HORM <- SMR06$DATE_1ST_HORM %>% ymd() 
SMR06$DATE_1ST_OTHER_THERAPY <-SMR06$DATE_1ST_OTHER_THERAPY %>% ymd() 
SMR06$DATE_1ST_RAD <-SMR06$DATE_1ST_RAD %>% ymd() 
SMR06$DATE_1ST_SURGERY <-SMR06$DATE_1ST_SURGERY %>% ymd()

SMR06$INCIDENCE_YEAR <-year(SMR06$INCIDENCE_DATE)
SMR06$INCIDENCE_YEAR <-as.numeric(SMR06$INCIDENCE_YEAR)



#HB code lookups

HB_LOOKUP<- read_sav("N:/Library/Lookups/Geographic/HB_Lookup.sav")



#Selecting patient CHI numbers who have a breast cancer diagnosis, this variable can be used to identify patients 
# who have additional tumours as well as early breast cancer
# Filtering to show only incidence dates from 01/01/2015 to 31/12/2016

#Cancer codes for early stage breast cancer

cancerCodes <- c("C50","C500","C501","C502","C503","C504","C505","C506",
                 "C508","C509","C5091")


#Check to see if first surgery occured within 9 months of being diagnosed

difftime(SMR06$DATE_1ST_SURGERY,SMR06$INCIDENCE_DATE,units=c("days"))->SMR06$time_from_diag_to_surg

#Flagging any surgeries which occur within 9 months of incidence date 
ifelse(SMR06$time_from_diag_to_surg<=273.75,1,0)->SMR06$Surg_within_9months


SMR06 %>% filter(ICD10S_CANCER_SITE %in% cancerCodes & INCIDENCE_YEAR>=2015 & INCIDENCE_YEAR<=2016 & HERCEPTIN_RECEPTOR_STATUS==1 & SURGERY==1 & Surg_within_9months==1) %>% select(UPI_NUMBER,DATE_OF_BIRTH,SEX,ETHNIC_GROUP,INCIDENCE_DATE,ICD10S_CANCER_SITE,OESTROGEN_RECEPTOR_STATUS,PROGESTERONE_RECEPTOR_STATUS,HERCEPTIN_RECEPTOR_STATUS,STAGE_CLINICAL_T,STAGE_CLINICAL_N,STAGE_CLINICAL_M,STAGE_PATHOLOGIC_T,STAGE_PATHOLOGIC_N,STAGE_PATHOLOGIC_M,SURGERY,DATE_1ST_SURGERY,HBRES_CURRENT,DATE_1ST_SURGERY,DATE_1ST_CHEMO) -> SMR06_BC_201516


#Creating a flag for metastasis 
  ifelse(SMR06_BC_201516$STAGE_CLINICAL_M==1 | SMR06_BC_201516$STAGE_PATHOLOGIC_M==1,1,0)->SMR06_BC_201516$M_flag
  


## Reading in advanced breast cancer dataset

read_csv("Z:/Oncology Data/Cancer LCDI/Secondary Breast Cancer CNS database/Second attempt/Adv_BC_pats_CHI v2.csv") %>% select(-c("NAME")) ->Sec_BC_pats

Sec_BC_pats$UPI_NUMBER %>% as.character() ->Sec_BC_pats$UPI_NUMBER

Sec_BC_pats %>% filter(UPI_NUMBER %in% SMR06_BC_201516$UPI_NUMBER) -> Sec_BC_pats

#Adding Metastatic incidence date to the SMR06 data 

Sec_BC_pats %>% select(UPI_NUMBER,Met_inc_date) %>% full_join(SMR06_BC_201516)->SMR06_BC_201516

as.Date(SMR06_BC_201516$Met_inc_date, format="%d/%m/%Y") -> SMR06_BC_201516$Met_inc_date

#Creating a flag to check if the patient has developed metastatic disease before having surgery

ifelse(SMR06_BC_201516$Met_inc_date<SMR06_BC_201516$DATE_1ST_SURGERY,1,0)->SMR06_BC_201516$Mets_before_surgery


#All patients with M stage 1 appear to have presented as metastatic initally and have no other smr06 records where M stage is 0
#Therefore it is safe to remove the records where stage M = 1 as these patients don't have any other records of early breast cancer

#filter out all M stage 1 patients 

SMR06_BC_201516 %>% filter(M_flag==0|is.na(M_flag)==1)->SMR06_BC_201516


#Matching Health Board descriptions


SMR06_BC_201516$HBRES_CURRENT_NAME <- with(HB_LOOKUP,HB_Name[match(SMR06_BC_201516$HBRES_CURRENT,
                                                                  HB2014)])



##Reading in Chemocare data

read_xlsx("Z:/Oncology Data/Cancer LCDI/Chemocare/MedicineUtilisationReport_20-12-2018_09-36-24.xlsx") -> Chemocare

#Filter Chemocare to the patients in the Cohort and the appointment dates between 2015 and 2017

Chemocare$`Appointment Date` %>% as.Date(format="%d-%m-%Y") -> Chemocare$`Appointment Date`

Chemocare %>% filter(`Appointment Date`>="2015-01-01" & `Appointment Date`<="2017-12-31") -> Chemocare

#Filtering to contain only neo-adjuvant treatments
#Filter the Chemocare data to show only the patients from the cohort
#Filter for cytotoxic or biological modifiers - these are Chemotherapy treatments and targeted therapy 

Chemocare %>% filter(grepl('Neo-Adjuvant', Intention)) %>% 
                       filter(CHI %in% SMR06_BC_201516$UPI_NUMBER) %>% 
  filter(`Drug Type` %in% c("Cytotoxic","Biological Modifer")& `Drug Status` %in% c("Authorised","Given"))->Chemocare_cyto_bio


#The CHI numbers from the chemocare_cyto_bio dataset are the patients who fit the following criteria:
#Have been diagnosed with breast cancer between 2015 and 2016
#Have had surgery within 9 months of being diagnosed with early BC 
#Have not been diagnosed as metastatic before having surgery
#Are HER2 +ve
#Have had at least one round of neo-adjuvant SACT


Chemocare_cyto_bio %>%  select(CHI) %>% unique() -> Cohort 


#Filter the datasets to the cohort of interset

SMR06 %>% filter(UPI_NUMBER %in% Cohort$CHI) -> SMR06

SMR06_BC_201516 %>% filter(UPI_NUMBER %in% Cohort$CHI) -> SMR06_BC_201516

Chemocare %>% filter(CHI %in% Cohort$CHI) -> Chemocare 

Introduction

The methods in this report has been developed as part of the LCDI project, for information on further methodologies, data sources and definitions please see the Cancer Wiki page: https://www.wiki.ed.ac.uk/display/CAN

This report shows the methods and subsequent results from deriving patient characteristics and outcomes in 42 HER2 positive early breast cancer patients who have met the following criteria:

The outcomes reported are at an aggregate level where patient numbers lower than 5 are suppressed.

Please note that some of the patients in the cohort are from health boards other than NHS Lothian, this can mean that patients who received chemotherapy or surgery at another health board will not be included in the analysis.

Methods

Cohort derivation

Datasets

The data used to derive the cohort has been extracted from the following datasets:

  • SMR06
    • Diagnosis date
    • Date of birth
    • Sex
    • Ethnicity
    • Oestrogen Receptor Status
    • TNM Status
    • Date of first surgery
    • Date of first chemotherapy
  • Chemocare
    • Appointment date
    • Intention
    • Regime
    • Drug status
    • Drug type
  • SMR01
    • Admission date
    • Main Operation 1A
    • Main Operation 1B
    • Other operation 1A
    • Other operation 1B
    • Other operation 2A
    • Other operation 2B
    • Other operation 3A
    • Other operation 3B
  • Advanced Breast Cancer Nurse database
    • Date first seen - this is used as date of metastatic diagnosis

The cohort is derived by first using SMR06 to identify patients who had a diagnosis date (incidence date) between 01/01/2015 and 31/12/2016, a surgery flag of 1 with a date of first surgery within 9 months of their diagnosis date and a HER2 flag of 1.

Also using SMR06, records were checked to ensure there were no additional incidences of metastatic breast cancer using clinical stage M and pathological stage M variables.

Since recurrence is not recorded on SMR06 the advanced breast cancer nurse database is used to identify any patients that had developed metastatic breast cancer between diagnosis date and surgery.

Chemocare is used to identify patients who had received neo-adjuvant SACT between 2015 and 2017, this time period is picked to include data for patients diagnosed towards the end of 2016. Neo-adjuvant treatments can be identified by using the Intention variable, Chemotherapy is identified by searching for a drug type of “Cytotoxic” and targeted therapy is identified by searching for a drug type of “Biological Modifer”.

As nurses may not have time to change drug status from “authorised” to “given” at the end of a treatment; both are selected to identify treatments given to the patients in the cohort.

Customers are encouraged to think of the time period of interest for the patients to have received chemotherapy.

Patient Characteristics

Age

Age is derived by calculating the difference between the patients date of birth and the early breast cancer diagnosis date in days and converting to years. Both variables are found in SMR06.

#Age 

#There are also no patients who have more than one incidence of breast cancer occuring on different dates

floor(difftime(SMR06_BC_201516$INCIDENCE_DATE,SMR06_BC_201516$DATE_OF_BIRTH, units=c("days"))/365)->SMR06_BC_201516$Age_at_diagnosis

SMR06_BC_201516$Age_at_diagnosis %>%  as.numeric() -> SMR06_BC_201516$Age_at_diagnosis

SMR06_BC_201516$Age_inc_group<-cut(SMR06_BC_201516$Age_at_diagnosis,c(0,39,49,59,150))

SMR06_BC_201516$Age_inc_group %>% 
  fct_recode("Under 40" = "(0,39]",
             "40-49" = "(39,49]",
             "50-59" = "(49,59]",
             "60+" = "(59,150]") -> SMR06_BC_201516$Age_inc_group

SMR06_BC_201516 %>% group_by(Age_inc_group) %>% summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1))-> Age_at_diagnosis_sum

Age_at_diagnosis_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Age_inc_group Number of patients % of Cohort
Under 40 9 21.4
40-49 13 31.0
50-59 11 26.2
60+ 9 21.4

Sex

Sex is derived by using data from SMR06. 100% of patient’s in the cohort are female.

#Sex
SMR06_BC_201516 %>% group_by(SEX) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1))-> Sex_sum

Sex_sum$SEX %>% as.factor() %>% fct_recode("Female"="2") %>% as.character() ->Sex_sum$SEX

Sex_sum %>% rename(Sex=SEX) ->Sex_sum

Sex_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Sex Number of patients % of Cohort
Female 42 100

Ethnicity

Ethnicity has been derived using the Ethnic_group variable from SMR06. Low numbers have been suppressed.

SMR06_BC_201516 %>% group_by(ETHNIC_GROUP) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER))-> Ethnicity_sum

Ethnicity_sum$ETHNIC_GROUP %>% as.factor() %>% fct_recode("White-Scottish"="1A",
                                                          "White-Other British"="1B",
                                                          "Polish"="1L",
                                                          "Any other white ethnic background"="1Z",
                                                          "Pakistani, Pakistani Scottish or Pakistani British"="3F",
                                                          "Indian, Indian Scottish or Indian British"="3G",
                                                          "Other Asian, Asian Scottish or Asian British"="3Z",
                                                          "Refused/not provided by the patient"="98",
                                                          "Not known"="99") %>% as.character() ->Ethnicity_sum$ETHNIC_GROUP


Ethnicity_sum %>% rename(`Ethnic Group`=ETHNIC_GROUP) %>%  arrange(`Number of patients`)->Ethnicity_sum

ifelse(Ethnicity_sum$`Number of patients`<5,"\\*",Ethnicity_sum$`Number of patients`)->Ethnicity_sum$`Number of patients`

Ethnicity_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
Ethnic Group Number of patients
Polish *
Indian, Indian Scottish or Indian British *
Other Asian, Asian Scottish or Asian British *
Any other white ethnic background *
Refused/not provided by the patient *
Not known 6
White-Other British 7
White-Scottish 19
Note:
*Number of patients less than 5 have been suppressed

TNM Staging

TNM staging has been derived by combining the Clinical and Pathological staging variables in SMR06.

Customers should specify how specific they want the TNM staging to be, for simplicity and aggregation the letters have been removed from the variable e.g. T1a T1b T1c are combined to T1.

# General TNM Staging ---- 

# table(SMR06_BC_201516$STAGE_CLINICAL_T, useNA = "ifany")
# table(SMR06_BC_201516$STAGE_CLINICAL_N, useNA = "ifany")
# table(SMR06_BC_201516$STAGE_CLINICAL_M, useNA = "ifany")

#No missing clinical T,N,M 

#removing the letters off of the staging variables

gsub("[^0-9\\X]", "", SMR06_BC_201516$STAGE_CLINICAL_T) -> SMR06_BC_201516$STAGE_CLINICAL_T

gsub("[^0-9\\X]", "", SMR06_BC_201516$STAGE_CLINICAL_N) -> SMR06_BC_201516$STAGE_CLINICAL_N

gsub("[^0-9\\X]", "", SMR06_BC_201516$STAGE_PATHOLOGIC_T) ->SMR06_BC_201516$STAGE_PATHOLOGIC_T

gsub("[^0-9\\X]", "", SMR06_BC_201516$STAGE_PATHOLOGIC_N) ->SMR06_BC_201516$STAGE_PATHOLOGIC_N



#combining clinical and pathological staging----

SMR06_BC_201516$STAGE_CLINICAL_T ->SMR06_BC_201516$stage_T 
ifelse(is.na(SMR06_BC_201516$stage_T)==1 | SMR06_BC_201516$stage_T=="X",SMR06_BC_201516$STAGE_PATHOLOGIC_T,SMR06_BC_201516$STAGE_CLINICAL_T) %>% 
  as.factor()->SMR06_BC_201516$stage_T

SMR06_BC_201516$STAGE_CLINICAL_N ->SMR06_BC_201516$stage_N
ifelse(is.na(SMR06_BC_201516$stage_N)==1 | SMR06_BC_201516$stage_N=="X",SMR06_BC_201516$STAGE_PATHOLOGIC_N,SMR06_BC_201516$STAGE_CLINICAL_N) %>% 
  as.factor()->SMR06_BC_201516$stage_N

SMR06_BC_201516$STAGE_CLINICAL_M ->SMR06_BC_201516$stage_M
ifelse(is.na(SMR06_BC_201516$stage_M)==1 | SMR06_BC_201516$stage_M=="X",SMR06_BC_201516$STAGE_PATHOLOGIC_M,SMR06_BC_201516$STAGE_CLINICAL_M) %>% 
  as.factor()->SMR06_BC_201516$stage_M

SMR06_BC_201516$stage_T %>% 
  as.factor() %>% 
  fct_recode("X Cannot be assessed" = "X")->SMR06_BC_201516$stage_T

SMR06_BC_201516$stage_N %>% 
  as.factor() %>% 
  fct_recode("X Cannot be assessed" = "X")->SMR06_BC_201516$stage_N

SMR06_BC_201516$stage_M %>% 
  as.factor() %>% 
  fct_recode("X Cannot be assessed" = "X")->SMR06_BC_201516$stage_M



SMR06_BC_201516 %>% group_by(stage_T) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) -> Stage_T_sum

SMR06_BC_201516 %>% group_by(stage_N) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) -> Stage_N_sum

SMR06_BC_201516 %>% group_by(stage_M) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) -> Stage_M_sum

ifelse(Stage_N_sum$`Number of patients`<20,"\\*",Stage_N_sum$`Number of patients`) -> Stage_N_sum$`Number of patients`


ifelse(Stage_M_sum$`Number of patients`<5,"\\*",Stage_M_sum$`Number of patients`) -> Stage_M_sum$`Number of patients`


Stage_T_sum %>% rename(`Stage T`=stage_T) ->Stage_T_sum
Stage_N_sum %>% rename(`Stage N`=stage_N) ->Stage_N_sum
Stage_M_sum %>% rename(`Stage M`=stage_M) ->Stage_M_sum


Stage_T_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Stage T Number of patients
1 6
2 22
3 5
4 9
Stage_N_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 20 have been suppressed due to the small number of categories in the variable")
Stage N Number of patients
0 *
1 25
2 *
Note:
*Number of patients less than 20 have been suppressed due to the small number of categories in the variable
Stage_M_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
Stage M Number of patients
0 38
X Cannot be assessed *
Note:
*Number of patients less than 5 have been suppressed

Hormone Receptor status

Hormone receptor status is derived by using the oestrogen receptor status and progesterone receptor status variables from SMR06.

#Oestrogen receptor status
SMR06_BC_201516 %>% group_by(OESTROGEN_RECEPTOR_STATUS) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1))-> ER_status_sum


ER_status_sum$OESTROGEN_RECEPTOR_STATUS %>% as.factor() %>% fct_recode("Negative"="0",
                                           "Positive"="1") %>% as.character() ->ER_status_sum$OESTROGEN_RECEPTOR_STATUS

ER_status_sum %>% rename(`Oestrogen receptor status`=OESTROGEN_RECEPTOR_STATUS) ->ER_status_sum


#Progesterone receptor status 

SMR06_BC_201516 %>% group_by(PROGESTERONE_RECEPTOR_STATUS) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1))-> Pro_status_sum


Pro_status_sum$PROGESTERONE_RECEPTOR_STATUS %>% as.factor() %>% fct_recode("Negative"="0",
                                           "Positive"="1","Not known"="9") %>% as.character() ->Pro_status_sum$PROGESTERONE_RECEPTOR_STATUS

Pro_status_sum %>% rename(`Progesterone receptor status`=PROGESTERONE_RECEPTOR_STATUS) ->Pro_status_sum

ER_status_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Oestrogen receptor status Number of patients % of Cohort
Negative 18 42.9
Positive 24 57.1
Pro_status_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Progesterone receptor status Number of patients % of Cohort
Negative 25 59.5
Positive 9 21.4
Not known 8 19.0

Time from diagnosis to first chemotherapy

Time from diagnosis to first chemotherapy has been derived using the diagnosis date from SMR06 and date of first chemotherapy. A future potential analysis would be to investigate the difference between date of first chemotherapy from SMR06 and the date of the first chemotherapy in Chemocare. Customers are encouraged to think of how to define a patients first chemotherapy e.g. neo-adjuvant, dosage, Taxane, Anthracycline, time period

difftime(SMR06_BC_201516$DATE_1ST_CHEMO,SMR06_BC_201516$INCIDENCE_DATE,units=c("days"))->SMR06_BC_201516$Time_to_first_chemo
SMR06_BC_201516$Time_to_first_chemo %>% as.numeric()-> SMR06_BC_201516$Time_to_first_chemo

SMR06_BC_201516 %>% summarise(`Minimum`=min(Time_to_first_chemo,na.rm=TRUE),
                              `Median`=median(Time_to_first_chemo,na.rm=TRUE),
                              `Mean`=round2(mean(Time_to_first_chemo,na.rm=TRUE),1),
                              `Maximum`=max(Time_to_first_chemo,na.rm=TRUE))-> Time_to_first_chemo_sum

kable(Time_to_first_chemo_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c("Time to first chemotherapy (days)" = 4))
Time to first chemotherapy (days)
Minimum Median Mean Maximum
20 30 33 57

Time from diagnosis to first surgery

Time from diagnosis to first chemotherapy has been derived using the diagnosis date from SMR06 and date of first chemotherapy. A future potential analysis would be to investigate the difference between date of first surgery from SMR06 and the date of the first surgery in SMR01. Customers are encouraged to think of how to define a patient’s first surgery e.g. Time period, type of surgery etc.

difftime(SMR06_BC_201516$DATE_1ST_SURGERY,SMR06_BC_201516$INCIDENCE_DATE,units=c("days"))->SMR06_BC_201516$Time_to_first_surgery
SMR06_BC_201516$Time_to_first_surgery %>% as.numeric()-> SMR06_BC_201516$Time_to_first_surgery

SMR06_BC_201516 %>% summarise(`Minimum`=min(Time_to_first_surgery),
                              `Median`=median(Time_to_first_surgery),
                              `Mean`=round2(mean(Time_to_first_surgery),1),
                              `Maximum`=max(Time_to_first_surgery))-> Time_to_first_surgery_sum

kable(Time_to_first_surgery_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c("Time to first surgery (days)" = 4))
Time to first surgery (days)
Minimum Median Mean Maximum
63 175 176.6 258

Neo-adjuvant SACT Regime received

Neo-adjuvant SACT recevied shows the Neo-adjuvent Regimes with a drug status of “Given” or “Authorised” administered between 01/01/2015 and 31/12/2017.

Customers are encouraged to think of how they would like the Regimes aggregated if the patient numbers are small.

Chemocare %>% filter(grepl('Neo-Adjuvant', Intention)) %>%
  filter(CHI %in% SMR06_BC_201516$UPI_NUMBER) %>% 
  filter(`Drug Status` %in% c("Authorised", "Given")) %>% 
  group_by(Regime) %>%  summarise(`Number of patients`= n_distinct(CHI)) %>% 
  mutate(`% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1))-> Regime_received_sum

Regime_received_sum %>% arrange(`Number of patients`) -> Regime_received_sum

ifelse(Regime_received_sum$`Number of patients`<5,"\\*",Regime_received_sum$`Number of patients`)->Regime_received_sum$`Number of patients`
ifelse(Regime_received_sum$`Number of patients`=="\\*","\\*",Regime_received_sum$`% of Cohort`)->Regime_received_sum$`% of Cohort`

kable(Regime_received_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
Regime Number of patients % of Cohort
FEC-D (D) * *
FEC-D (FEC) * *
FEC 100 * *
FEC 80 * *
KAITLIN ARM 1 LD * *
KAITLIN ARM 1 MD * *
T ROSCO TRAST LD * *
T ROSCO TRAST SC * *
ZOLEND ADJUVANT * *
DOCETAXEL&CYCLO * *
EPI/CYCLO ADJ * *
DOCETAXEL&CARBO * *
PACLITAX WKLY 8 19
FEC-D NEO (D) 26 61.9
FEC-D NEO (FEC) 29 69
TRASTUZ MD 3WKLY 35 83.3
TRASTUZ LD 3WKLY 36 85.7
TRASTUZ SC 36 85.7
Note:
*Number of patients less than 5 have been suppressed

Type of surgery

Type of surgery has been defined by extracting inpatient records from SMR01 covering the time period of 01/01/2015 and 31/12/2017. The OPCS4 codes used to identify breast procedures can be found in the syntax for this report.

Customers should specify OPCS4 codes to avoid any ambiguity around the analysis.

# SMR01colpos <- read_excel("Z:/Oncology Data/Cancer LCDI/SMR/SMR01B col names pos example2.xlsx")
# 
# 
# SMR01B_from <- SMR01colpos$Position_From
# SMR01B_to<-SMR01colpos$Position_To
# SMR01B_names<-SMR01colpos$Field_name
# 
# SMR01B <- read_fwf("N:/Upload/Linked Data/Raw Data/S_mrl_smr01b.dat", fwf_positions(SMR01B_from,SMR01B_to,
#                                                                                      col_names = SMR01B_names),guess_max =500000)
# 
# SMR01B %>% filter(UPI_NUMBER %in% Cohort$CHI) ->SMR01B
# 
# SMR01B$ADMISSION_DATE %>% ymd() ->SMR01B$ADMISSION_DATE
# 
# 
# SMR01B %>% filter(ADMISSION_DATE>="2015-01-01" & ADMISSION_DATE<="2017-12-31") ->SMR01B
# 
# SMR01B %>% write_xlsx("SMR01_example2.xlsx")



read_xlsx("SMR01_example2.xlsx",guess_max = 2000) -> SMR01

left_join(SMR06_BC_201516,SMR01,by=c("UPI_NUMBER"="UPI_NUMBER")) -> SMR06_BC_201516

#Remove some variables that aren't of interest


#Read in OPCS4 codes

read_xlsx("opcs4_codes.xlsx")-> opcs4_codes

#Match code descriptions to operations

SMR06_BC_201516$MAIN_OP_A_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$MAIN_OPERATION_A,
                                                                  Code)])

SMR06_BC_201516$MAIN_OP_B_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$MAIN_OPERATION_B,
                                                                  Code)])

SMR06_BC_201516$OTHER_OP_1A_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_1_A,
                                                                  Code)])

SMR06_BC_201516$OTHER_OP_1B_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_1_B,
                                                                  Code)])


SMR06_BC_201516$OTHER_OP_2A_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_2_A,
                                                                  Code)])

SMR06_BC_201516$OTHER_OP_2B_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_2_B,
                                                                  Code)])


SMR06_BC_201516$OTHER_OP_3A_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_3_A,
                                                                  Code)])

SMR06_BC_201516$OTHER_OP_3B_desc <- with(opcs4_codes,Description[match(SMR06_BC_201516$OTHER_OPERATION_3_B,
                                                                  Code)])



#Filter for only breast codes

c("B00","B27","B278","B279","B28","B281","B282","B283","B284","B285","B286","B287","B288","B289","B29","B291","B292","B293","B294","B295","B298","B299","B30","B301","B302","B303","B304","B308","B309","B31","B318","B319","B32","B321","B322","B323","B328","B329","B33","B332","B333","B338","B339","B34","B348","B349","B37","B371","B372","B373","B374","B375","B378","B379","B38","B381","B382","B388","B389","B39","B391","B392","B393","B394","B395","B398","B399","B40","B401","B408","B409","S482","S493","U18","U182","U188","U189","Z15","Z151","Z152","Z153","Z154","Z155","Z158") -> Breast_opcs4_codes

##

SMR06_BC_201516 %>% filter(MAIN_OPERATION_A %in% Breast_opcs4_codes) %>% group_by(MAIN_OPERATION_A,MAIN_OP_A_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`) -> MAIN_OP_A_sum

ifelse(MAIN_OP_A_sum$`Number of patients`<5,"\\*",MAIN_OP_A_sum$`Number of patients`)-> MAIN_OP_A_sum$`Number of patients`
ifelse(MAIN_OP_A_sum$`Number of patients`=="\\*","\\*",MAIN_OP_A_sum$`% of Cohort`)-> MAIN_OP_A_sum$`% of Cohort`

kable(MAIN_OP_A_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
MAIN_OPERATION_A MAIN_OP_A_desc Number of patients % of Cohort
B278 OTHER SPECIFIED TOTAL EXCISION OF BREAST * *
B291 RECONSTR OF BREAST USING MYOCUTANEOUS FLAP OF LATISSIMUS DORSI MUSCLE * *
B301 INSERTION OF PROSTHESIS FOR BREAST * *
B303 REMOVAL OF PROSTHESIS FOR BREAST * *
B393 RECONSTR OF BREAST USING FREE DEEP INFERIOR EPIGASTRIC PERFORATOR FLAP * *
B284 RE-EXCISION OF BREAST MARGINS * *
B287 WIRE GUIDED EXCISION OF LESION OF BREAST * *
B282 PARTIAL EXCISION OF BREAST NEC * *
B375 LIPOFILLING OF BREAST 5 11.9
B279 UNSPECIFIED TOTAL EXCISION OF BREAST 8 19
B285 WIRE GUIDED PARTIAL EXCISION OF BREAST 18 42.9
Note:
*Number of patients less than 5 have been suppressed
##

SMR06_BC_201516 %>% filter(MAIN_OPERATION_B %in% Breast_opcs4_codes) %>% group_by(MAIN_OPERATION_B,MAIN_OP_B_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> MAIN_OP_B_sum

ifelse(MAIN_OP_B_sum$`Number of patients`<5,"\\*",MAIN_OP_B_sum$`Number of patients`)-> MAIN_OP_B_sum$`Number of patients`
ifelse(MAIN_OP_B_sum$`Number of patients`=="\\*","\\*",MAIN_OP_B_sum$`% of Cohort`)-> MAIN_OP_B_sum$`% of Cohort`

kable(MAIN_OP_B_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
MAIN_OPERATION_B MAIN_OP_B_desc Number of patients % of Cohort
B301 INSERTION OF PROSTHESIS FOR BREAST * *
Note:
*Number of patients less than 5 have been suppressed
##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_1_A %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_1_A,OTHER_OP_1A_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_1A_sum

ifelse(OTHER_OP_1A_sum$`Number of patients`<5,"\\*",OTHER_OP_1A_sum$`Number of patients`)-> OTHER_OP_1A_sum$`Number of patients`
ifelse(OTHER_OP_1A_sum$`Number of patients`=="\\*","\\*",OTHER_OP_1A_sum$`% of Cohort`)-> OTHER_OP_1A_sum$`% of Cohort`


kable(OTHER_OP_1A_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
OTHER_OPERATION_1_A OTHER_OP_1A_desc Number of patients % of Cohort
B284 RE-EXCISION OF BREAST MARGINS * *
B292 RECONSTRUCTION OF BREAST USING LOCAL FLAP OF SKIN NEC * *
B375 LIPOFILLING OF BREAST * *
B298 OTHER SPECIFIED RECONSTRUCTION OF BREAST * *
B291 RECONSTR OF BREAST USING MYOCUTANEOUS FLAP OF LATISSIMUS DORSI MUSCLE * *
Note:
*Number of patients less than 5 have been suppressed
##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_1_B %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_1_B,OTHER_OP_1B_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_1B_sum

ifelse(OTHER_OP_1B_sum$`Number of patients`<5,"\\*",OTHER_OP_1B_sum$`Number of patients`)-> OTHER_OP_1B_sum$`Number of patients`
ifelse(OTHER_OP_1B_sum$`Number of patients`=="\\*","\\*",OTHER_OP_1B_sum$`% of Cohort`)-> OTHER_OP_1B_sum$`% of Cohort`

kable(OTHER_OP_1B_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
OTHER_OPERATION_1_B OTHER_OP_1B_desc Number of patients % of Cohort
B301 INSERTION OF PROSTHESIS FOR BREAST * *
Note:
*Number of patients less than 5 have been suppressed
##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_2_A %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_2_A,OTHER_OP_2A_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_2A_sum

ifelse(OTHER_OP_2A_sum$`Number of patients`<5,"\\*",OTHER_OP_2A_sum$`Number of patients`)-> OTHER_OP_2A_sum$`Number of patients`
ifelse(OTHER_OP_2A_sum$`Number of patients`=="\\*","\\*",OTHER_OP_2A_sum$`% of Cohort`)-> OTHER_OP_2A_sum$`% of Cohort`


kable(OTHER_OP_2A_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote(general="*Number of patients less than 5 have been suppressed")
OTHER_OPERATION_2_A OTHER_OP_2A_desc Number of patients % of Cohort
B285 WIRE GUIDED PARTIAL EXCISION OF BREAST * *
B375 LIPOFILLING OF BREAST * *
Note:
*Number of patients less than 5 have been suppressed
##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_2_B %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_2_B,OTHER_OP_2B_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_2B_sum

ifelse(OTHER_OP_2B_sum$`Number of patients`<5,"\\*",OTHER_OP_2B_sum$`Number of patients`)-> OTHER_OP_2B_sum$`Number of patients`
ifelse(OTHER_OP_2B_sum$`Number of patients`=="\\*","\\*",OTHER_OP_2B_sum$`% of Cohort`)-> OTHER_OP_2B_sum$`% of Cohort`

#No data for other op 2b

##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_3_A %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_3_A,OTHER_OP_3A_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_3A_sum

ifelse(OTHER_OP_3A_sum$`Number of patients`<5,"\\*",OTHER_OP_3A_sum$`Number of patients`)-> OTHER_OP_3A_sum$`Number of patients`
ifelse(OTHER_OP_3A_sum$`Number of patients`=="\\*","\\*",OTHER_OP_3A_sum$`% of Cohort`)-> OTHER_OP_3A_sum$`% of Cohort`

#No data for other op 3a

##

SMR06_BC_201516 %>% filter(OTHER_OPERATION_3_B %in% Breast_opcs4_codes) %>% group_by(OTHER_OPERATION_3_B,OTHER_OP_3B_desc) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER),
            `% of Cohort` =round2(`Number of patients`/n_distinct(Cohort$CHI)*100,1)) %>% arrange(`Number of patients`)-> OTHER_OP_3B_sum

ifelse(OTHER_OP_3B_sum$`Number of patients`<5,"\\*",OTHER_OP_3B_sum$`Number of patients`)-> OTHER_OP_3B_sum$`Number of patients`
ifelse(OTHER_OP_3B_sum$`Number of patients`=="\\*","\\*",OTHER_OP_3B_sum$`% of Cohort`)-> OTHER_OP_3B_sum$`% of Cohort`

#No data for other op 3b