Introduction

The methods in this report has been developed as part of the LCDI project, for information on further methodologies, data sources and definitions please see the Cancer Wiki page: https://www.wiki.ed.ac.uk/display/CAN

This report shows the methods and subsequent results from deriving patient characteristics in 277 Secondary breast cancer (SBC) patients diagnosed between 2013 and 2017.

SBC patients were identified through the Secondary Breast Cancer CNS Database which is maintained by the cancer nurses. This dataset comes in two formats from the Western General and St. John’s hospitals, the data are manually inputted and substantial data cleaning is required before reading the data into any analytical software.

There may also be typos in Date of Birth, Date of Metastases/Date Seen and also the patient identifier which appears to be Case Reference Number (CRN). In order to get the CHI, the data has to be linked to other sources by CRN. In cases where the CRN has not matched check against name and date of birth in SMR06, there may be a typo in the CRN.

“Date seen” in the data from the Western General is considered to be the date of Metastases.

The datasets used to determine patient characteristics are:

When using the chemotherapy table from the SESCD there may be drugs that are not chemotherapy such as Herceptin, please make sure to filter out drugs that are not of interest.

Data_sources_patients %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Data Source Number of Patients (%)
SESCD Main 243 87.73
SESCD hormone 243 87.73
SESCD chemotherapy 129 46.57
SMR06 270 97.47
Chemocare 197 71.12
PIS 254 91.70
NRS Deaths 138 49.82

Age at diagnosis

#Age at diagnosis of Metastases ----
#SMR06

SMR06 %>% filter(is.na(DATE_OF_BIRTH)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->Missing_DOB


# Six dates of birth are missing, infer them from CHI/UPI NUMBER

ifelse(is.na(SMR06$DATE_OF_BIRTH),paste(substr(SMR06$UPI_NUMBER, start = 5, stop = 6),substr(SMR06$UPI_NUMBER, start = 3, stop = 4),substr(SMR06$UPI_NUMBER, start = 1, stop = 2),sep="/"),"") ->SMR06$DATE_OF_BIRTH1

#All of the DOB are from 19XX
#SMR06 %>% filter(is.na(DATE_OF_BIRTH)==1) %>% select(UPI_NUMBER)
#Changing the start of the year to be 19 otherwise it will default to 20XX.

format(as.Date(SMR06$DATE_OF_BIRTH1,"%y/%m/%d"),"19%y/%m/%d") %>% as.Date()->SMR06$DATE_OF_BIRTH1


ifelse(is.na(SMR06$DATE_OF_BIRTH)==1,SMR06$DATE_OF_BIRTH1,SMR06$DATE_OF_BIRTH) ->SMR06$DATE_OF_BIRTH

as.Date(SMR06$DATE_OF_BIRTH,origin="1970-01-01") -> SMR06$DATE_OF_BIRTH

difftime(SMR06$Met_inc_date,SMR06$DATE_OF_BIRTH,units="days") ->SMR06$Age_at_mets

floor(SMR06$Age_at_mets/365) %>% as.character() %>% as.numeric() ->SMR06$Age_at_mets

Age at diagnosis is defined by calculating the difference in time in days between the date of metastatic diagnosis from the SBC database and the Date of Birth. The days are then divided by 365 and rounded down to convert to years.

Initally date of birth (DOB) is taken from SMR06 but 7 patients are missing a DOB. The DOB for these patients are inferred from the first six digitis of the CHI number.

The youngest patient in the cohort is 31, the oldest patient is 91.

SMR06 %>% group_by(Age_at_mets) %>% 
  summarise(N_pats = n_distinct(UPI_NUMBER)) %>% ungroup()->Age_at_mets_sum

  plot_ly(x=Age_at_mets_sum$`Age_at_mets`,y=Age_at_mets_sum$N_pats, type="bar",height=700,hoverinfo = 'text', marker = list(color = 'rgb(158,202,225)',
                      line = list(color = 'rgb(8,48,107)',
                                  width = 1.5)),
          text = ~paste('Age:',Age_at_mets_sum$Age_at_mets,
                        '<br> Number of Patients:', Age_at_mets_sum$N_pats)) %>%
  layout(title = "Age at Metastatic Diagnosis in Breast Cancer patients diagnosed between 2013 and 2017",
         xaxis = list(range=c(30,95),ticks="outside",title="Patient Age"),
         yaxis = list(ticks="outside",title="Number of Patients"))
SMR06 %>% filter(`Line of Treatment`=="1st")%>% group_by(Age_at_mets,Treatment_category) %>% 
  summarise(N_pats = n_distinct(UPI_NUMBER)) %>% ungroup()->Age_at_mets_sum_treat


Age_at_mets_sum_treat %>%  spread(key=Treatment_category,N_pats) ->Age_at_mets_sum_treat


 
Age_at_mets_sum_treat_plot <- plot_ly(Age_at_mets_sum_treat, x = ~Age_at_mets, y = ~Endocrine, type = 'bar', name = 'Endocrine',height=700,text= ~paste("1st Line: Endocrine",
    '<br> Age:',Age_at_mets_sum_treat$Age_at_mets,
    '<br> Number of Patients:', Age_at_mets_sum_treat$Endocrine),hoverinfo='text', marker = list(color = "#41b6c4",line = list(color = 'black',
                                  width = 1.5))) %>%
  add_trace(y = ~Chemotherapy, name = 'Chemotherapy',text= ~paste("1st Line: Chemotherapy",
    '<br> Age:',Age_at_mets_sum_treat$Age_at_mets,
    '<br> Number of Patients:', Age_at_mets_sum_treat$Chemotherapy),hoverinfo='text', marker = list(color = "#253494",line = list(color = 'black',
                                  width = 1.5))) %>%
  add_trace(y = ~`No treatment`, name = 'No treatment',text= ~paste("1st Line: No treatment",
    '<br> Age:',Age_at_mets_sum_treat$Age_at_mets,
    '<br> Number of Patients:', Age_at_mets_sum_treat$`No treatment`),hoverinfo='text',marker = list(color = "#a1dab4",line = list(color = 'black',
                                  width = 1.5))) %>% 
  add_trace(y = ~Death, name= 'Death',text= ~paste("1st Line: Death",
    '<br> Age:',Age_at_mets_sum_treat$Age_at_mets,
    '<br> Number of Patients:', Age_at_mets_sum_treat$Death),hoverinfo='text',marker = list(color = "#ffffcc",line = list(color = 'black',
                                  width = 1.5))) %>% 
  add_trace(y = ~`Trial/other`, name = 'Trial/other',text= ~paste("1st Line: Trial/other",
    '<br> Age:',Age_at_mets_sum_treat$Age_at_mets,
    '<br> Number of Patients:', Age_at_mets_sum_treat$`Trial/other`),hoverinfo='text',marker = list(color = "#2c7fb8",line = list(color = 'black',
                                  width = 1.5))) %>% 
  layout(title = "Age at Metastatic Diagnosis in Breast Cancer patients diagnosed between 2013 and 2017 by 1st line of treatment",
         xaxis = list(range=c(30,95),ticks="outside",title="Patient Age"),
         yaxis = list(ticks="outside",title="Number of Patients"),
         barmode='stack',legend = list(orientation = "h",   # show entries horizontally
                     xanchor = "center",  # use center of legend as anchor
                     x = 0.5)) 

Age_at_mets_sum_treat_plot

Presentation - De Novo/Recurrent

#Presentation -  De novo/recurrent ---- 
#Recurrent
#Patients who have a record for breast cancer before their metastatic date are recurrent 
#It appears that the date of metastatic incidence can be out of sync by a few days compared to the SMR06 incidence date
#If Day/month/year format is used then patients who are denovo will be considered to be recurrent 

#6 missing incidence dates

#SMR06 %>% filter(is.na(INCIDENCE_DATE==1)) %>% select(UPI_NUMBER) %>% n_distinct()
  
SMR06 %>% filter(INCIDENCE_DATE<Met_inc_date)->Recurrent_SMR06

#Calculating the difference in time between the incidence date in SMR06 and the incidence date in the Nurse Dataset 
difftime(Recurrent_SMR06$Met_inc_date,Recurrent_SMR06$INCIDENCE_DATE,units=c("weeks"))->Recurrent_SMR06$timediff

#Filtering for metastatic records only to see the time difference between nurse dataset and SMR06 
Recurrent_SMR06 %>% filter(STAGE_CLINICAL_M==1|STAGE_PATHOLOGIC_M==1) %>% 
  mutate(Metastatic_timediff=as.numeric(difftime(Met_inc_date,INCIDENCE_DATE,units=c("days")))) %>% 
  select(UPI_NUMBER,STAGE_CLINICAL_M,STAGE_PATHOLOGIC_M,INCIDENCE_DATE,Met_inc_date,`Line of Treatment`,Treatment_category,Treatment_category2,Metastatic_timediff) %>% 
  as.data.frame()-> SMR06_metastatic


Recurrent_SMR06 %>% filter(STAGE_CLINICAL_M!=1 & STAGE_PATHOLOGIC_M!=1| is.na(STAGE_CLINICAL_M)==1|is.na(STAGE_PATHOLOGIC_M)==1) %>% 
  mutate(Non_Metastatic_timediff=as.numeric(difftime(Met_inc_date,INCIDENCE_DATE,units=c("weeks")))) %>% 
  select(UPI_NUMBER,STAGE_CLINICAL_M,STAGE_PATHOLOGIC_M,INCIDENCE_DATE,Met_inc_date,`Line of Treatment`,Treatment_category,Treatment_category2,Non_Metastatic_timediff) %>% 
  as.data.frame()-> SMR06_non_metastatic


SMR06_non_metastatic %>% filter(Non_Metastatic_timediff<=13) %>% select(UPI_NUMBER) %>% n_distinct() -> SMR06_M0_in13weeks

Recurrent patients are those who have a record of early breast cancer (Stage M 0) in SMR06 before their record of metastasis.

Please note that the Metastasis incidence date on the Secondary Breast Cancer Database can be a few days behind the incidence date for a metastatic record on SMR06 meaning that simply taking any record for breast cancer before the date of metastasis can mean patients who are de novo are classified instead as recurrent.

Instead use the Clinical and Pathological M staging variables to exclude records that are M stage 1.

There are 78 (28.2%) patients who have an SMR06 record that either has a Clinical Stage M of 1 or a Pathological Stage M of 1 occuring before their date of Metastasis on the SBC Nurse database.

The time difference between these records has a minimum of 1 days, a maximum of 1213 days and a median of 24 days.

There are 7 (2.53%) patients who have a Stage M 0 breast cancer record within three months before being diagnosis as metastatic. It is likely that these patients are metastatic and perhaps the records haven’t been updated on SMR06, these records are therefore not included in the derivation recurrent/de novo status.

Therefore patients who are defined as recurrent are those who have a record of early breast cancer - i.e. Stage M 0 - occuring more than three months before the date of metastatic diagnosis.

#Going to assume that records where the met inc date and inc date on smr06 are the same record and unlikely to be two different tumours
#filter out these records and any other records that remain are considered to be recurrent records
#Also filter out patients who are clinical stage M=1 or Pathological stage M=1
#a quick look shows there are no pathological stage m=1 

Recurrent_SMR06 %>% filter(timediff>13) %>% 
  filter(STAGE_CLINICAL_M!=1|is.na(STAGE_CLINICAL_M)==1)-> Recurrent_SMR06

n_distinct(Recurrent_SMR06$UPI_NUMBER) -> Total_recurrent_BC_pats 



#De novo - Patients who do not have a record for advanced breast cancer before their metastatic date

#There will be patients who are might have more than once incidence date before and after date of metastatic diagnosis. 
#Remove the patients who also have been diagnosed before mets because these are not DeNovo patients 
anti_join(SMR06,Recurrent_SMR06,by="UPI_NUMBER") ->DeNovo_SMR06

DeNovo_SMR06 %>% select(UPI_NUMBER) %>%  n_distinct()->Total_denovo_BC_pats




data.frame(c("Recurrent","De novo"),c(Total_recurrent_BC_pats,Total_denovo_BC_pats)) -> Recurrent_denovo_summary

colnames(Recurrent_denovo_summary) <- c("Breast Cancer Patient Type","Number of patients")

Recurrent_denovo_summary %>% mutate (`% of Adv BC patients`=round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,2)) ->Recurrent_denovo_summary

Recurrent_denovo_summary$`Breast Cancer Patient Type` %>% as.character() ->Recurrent_denovo_summary$`Breast Cancer Patient Type`

## Recurrent/ De novo by first line of treatment 

#Recurrent patients by first line of treatment 
Recurrent_SMR06 %>% filter(`Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% summarise(`recurrent_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`recurrent_% of Adv BC patients`=round2(`recurrent_Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,2)) %>% rename("Treatment category"=Treatment_category) ->recurrent_bc_firstline


recurrent_bc_firstline %>% bind_rows(recurrent_bc_firstline %>% 
                      summarise_if(is.numeric, sum) %>% 
                      mutate(`Treatment category`="Total")) ->recurrent_bc_firstline


#DeNovo patients by first line of treatment 
DeNovo_SMR06 %>% filter(`Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% summarise(`de novo_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`de novo_% of Adv BC patients`=round2(`de novo_Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,2))%>% rename("Treatment category"=Treatment_category) -> denovo_bc_firstline


denovo_bc_firstline %>% bind_rows(denovo_bc_firstline %>% 
                      summarise_if(is.numeric, sum) %>% 
                      mutate(`Treatment category`="Total")) ->denovo_bc_firstline

#Combining recurrent denovo by type of first line treatment

left_join(recurrent_bc_firstline,denovo_bc_firstline) ->Recurrent_denovo_summary_1stline

After filtering out records occurring less than three months before metastatic diagnosis there are 174 (62.82%) patients who are considered as recurrent patients.

De novo patients are those who do not have a record of recurrence, of which there are 103 (37.18%) patients.

rbind(Recurrent_denovo_summary,c("Total",colSums(Recurrent_denovo_summary[,-1])))-> Recurrent_denovo_summary

Recurrent_denovo_summary %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left")
Breast Cancer Patient Type Number of patients % of Adv BC patients
Recurrent 174 62.82
De novo 103 37.18
Total 277 100

Recurrent/ De novo patients by 1st line of treatment

library(english)
library(tools)

Recurrent_denovo_summary_1stline = Recurrent_denovo_summary_1stline %>% 
  set_names(gsub(".*_","", names(.)))

kable(Recurrent_denovo_summary_1stline, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Recurrent" = 2, "De novo" = 2)) %>% 
  row_spec(nrow(Recurrent_denovo_summary_1stline),bold=T) 
Recurrent
De novo
Treatment category Number of patients % of Adv BC patients Number of patients % of Adv BC patients
Chemotherapy 37 13.36 24 8.66
Death 12 4.33 3 1.08
Endocrine 119 42.96 70 25.27
No treatment 4 1.44 6 2.17
Trial/other 2 0.72 NA NA
Total 174 62.81 103 37.18

Prior Chemotherapy Y/N

#Prior Chemotherapy Y/N ----
#SMR06

#Patients missing the Chemotherapy flag 


Recurrent_SMR06 %>% filter(is.na(CHEMO)==1) %>%
 select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,Treatment_category,CHEMO,DATE_1ST_CHEMO) %>%
 mutate(timediff=difftime(Met_inc_date,DATE_1ST_CHEMO,units=c("weeks"))) %>% 
  filter(timediff>0) %>% 
  select(UPI_NUMBER) %>% n_distinct()->Prior_chemo_SMR06 ->NA_SMR06_CHEMO_pats



Recurrent_SMR06 %>% filter(CHEMO==1) %>%
 select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,Treatment_category,CHEMO,DATE_1ST_CHEMO) %>%
 mutate(timediff=difftime(Met_inc_date,DATE_1ST_CHEMO,units=c("weeks"))) %>% 
  filter(timediff>0)->Prior_chemo_SMR06

n_distinct(Prior_chemo_SMR06$UPI_NUMBER) ->Prior_Chemo_SMR06_pats

#Onc_Chemo
# joining with recurrent bc patients to get cheotherapy treatments occuring before date of metastatic diagnosis. 

left_join(Recurrent_SMR06,Onc_Main_Chemo, by=c("UPI_NUMBER","Met_inc_date","Line of Treatment","Treatment_category")) %>% 
  mutate(timediff_onc=difftime(Met_inc_date,STDATE,units=c("weeks"))) %>% 
  filter(is.na(timediff_onc)==0) %>% 
  filter(timediff_onc>0) %>% 
  select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,Treatment_category,timediff_onc,STDATE,ENDDATE,METHOD,MATERIAL)-> Prior_chemo_Onc

n_distinct(Prior_chemo_Onc$UPI_NUMBER)-> Prior_chemo_Onc_pats


anti_join(Prior_chemo_Onc,Prior_chemo_SMR06,by="UPI_NUMBER") %>% select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_chemo_Onc_notSMR06_pats

anti_join(Prior_chemo_SMR06,Prior_chemo_Onc,by="UPI_NUMBER") %>% 
  select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_chemo_SMR06_notOnc_pats




#full_join(Prior_chemo_Onc,Prior_chemo_SMR06,by=c("UPI_NUMBER","Met_inc_date")) %>%
#  summarise(Number_of_pats=n_distinct(UPI_NUMBER))

full_join(Prior_chemo_Onc,Prior_chemo_SMR06,by=c("UPI_NUMBER","Met_inc_date")) %>% 
  select(UPI_NUMBER,Met_inc_date) %>% 
  distinct(UPI_NUMBER, .keep_all=TRUE) ->Prior_chemo_pats


Recurrent_SMR06 %>% mutate(`Prior Chemotherapy` = ifelse(UPI_NUMBER %in% Prior_chemo_pats$UPI_NUMBER,"Yes","No")) %>% 
  select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,Treatment_category,CHEMO,DATE_1ST_CHEMO,`Prior Chemotherapy`,HORM_THERAPY,DATE_1ST_HORM) ->Recurrent_SMR06

  Recurrent_SMR06 %>% 
  group_by(`Prior Chemotherapy`) %>% 
  summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of recurrent patients`=round2(`Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2))-> Prior_chemo_YN_summary

rbind(Prior_chemo_YN_summary,c("Total",colSums(Prior_chemo_YN_summary[,-1])))-> Prior_chemo_YN_summary



## Prior Chemo Y/N by first line of treatment 

Recurrent_SMR06 %>% filter(`Prior Chemotherapy` == "Yes" & `Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% 
  summarise(`Yes_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`Yes_% of recurrent patients`=round2(`Yes_Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2)) -> Prior_chemo_Y_summary

Recurrent_SMR06 %>% filter(`Prior Chemotherapy` == "No" & `Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% 
  summarise(`No_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`No_% of recurrent patients`=round2(`No_Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2)) -> Prior_chemo_N_summary

full_join(Prior_chemo_Y_summary,Prior_chemo_N_summary) %>% 
  rename(`1st line treatment`="Treatment_category")-> Prior_chemo_YN_1st_summary


Prior_chemo_YN_1st_summary %>% 
  set_names(gsub(".*_","", names(.))) -> Prior_chemo_YN_1st_summary 

Prior chemotherapy in recurrent breast cancer patients is derived using the CHEMO flag in SMR06 and chemotherapy records for breast cancer only in the SESCD occuring before the date of metastatic diagnosis. The SESCD is used to infer prior chemotherapy status as it has historical chemotherapy data, Chemocare only has data from the latter of 2012 onwards.

There are 0 patients who have a missing CHEMO flag from records in SMR06 occuring before the date of metastatic diagnosis.

First recurrent patients that have a CHEMO flag of 1 in SMR06 are identifed, similarly recurrent patients are identifed in the Chemotherapy table from the SESCD.

There are 98 (56.32%) recurrent patients who were found to have prior chemotherapy using the SMR06 database.

Using the SESCD 97 (55.75%) recurrent patients were found to have a record of prior chemotherapy.

There are 3 (1.72%) patients who were found with a record for prior chemotherapy in the SESCD that were not found in SMR06, there were 4 (2.3%) patients who had a record for prior chemotherapy in SMR06 but not the SESCD.

The prior chemotherapy patients from SMR06 and the SESCD are combined to give a final Prior Chemotherapy Y/N flag.

Prior_chemo_YN_summary %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Prior Chemotherapy Number of patients % of recurrent patients
No 73 41.95
Yes 101 58.05
Total 174 100

Prior Chemotherapy by 1st line of treatment

kable(Prior_chemo_YN_1st_summary, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Prior Chemo" = 2, "No Prior Chemo" = 2))
Prior Chemo
No Prior Chemo
1st line treatment Number of patients % of recurrent patients Number of patients % of recurrent patients
Chemotherapy 31 17.82 6 3.45
Death 6 3.45 6 3.45
Endocrine 61 35.06 58 33.33
No treatment 1 0.57 3 1.72
Trial/other 2 1.15 NA NA

Prior Chemotherapy Type

#Prior Chemotherapy type from SESCD only----

# Prior_chemo_type_Onc_pats$MATERIAL %>% as.factor() %>% 
#   fct_recode("Epirubicin" = "351",
#              "Doxorubicin (Adriamycin)" = "355",
#              "CMF – Cyclophosphamide, Methotrexate, 5FU" = "382",
#              "Epirubicin, Cyclophosphamide, 5FU" = "384",
#              "EC – Epirubicin, Cyclophosphamide"="388",
#              "Taxotere – Docetaxol"="847",
#              "Taxol – Paclitaxel"="848",
#              "Other"="899") %>% 
#   as.character() -> Prior_chemo_type_Onc_patsMATERIAL

#Prior_chemo_Onc$MATERIAL %>% table(useNA="always")

#Cannot infer what codes 001, 399 and 899 would be as they are combinations of drugs 

Prior_chemo_Onc$MATERIAL %>% as.factor() %>% 
  fct_recode("Combination unknown" = "001",
             "Non-Anthracycline/Non-Taxane" = "133",
             "Anthracycline" = "351",
             "Anthracycline" = "355",
             "Non-Anthracycline/Non-Taxane" = "382",
             "Anthracycline" = "384",
             "Anthracycline" ="388",
             "Anthracycline" = "484",
             "Anthracycline"="488",
             "Taxane"="826",
             "Non-Anthracycline/Non-Taxane"="828",
             "Taxane"="847",
             "Taxane"="848",
             "Combination unknown" = "899") %>% 
  as.character() -> Prior_chemo_Onc$MATERIAL_1


#Combinations
#Antha + nonAnthra/nonTax = Other
#Anthra + tax = Anthra + tax
#Antra = antra
#tax + nonAnthra/nonTax = Other
#Nonanthra/nontax - nonanthra/nontax 
#None


#Combining the treatments for each patient to create a final prior chemotherpay type 
#Ignore the "Vectorizing 'glue' elements may not preserve their attributes" warning

Prior_chemo_Onc %>% distinct(UPI_NUMBER,MATERIAL_1) -> Prior_chemo_Onc_temp

Prior_chemo_Onc_temp %>% arrange(MATERIAL_1) ->Prior_chemo_Onc_temp

suppressWarnings(Prior_chemo_Onc_temp %>% group_by(UPI_NUMBER) %>% 
  mutate(MATERIAL_2=glue_collapse(MATERIAL_1,sep=" "))) %>% 
  select(UPI_NUMBER,MATERIAL_2) -> Prior_chemo_Onc_temp

full_join(Prior_chemo_Onc,Prior_chemo_Onc_temp) ->Prior_chemo_Onc

rm(Prior_chemo_Onc_temp)

#The assumption is made that the combination codes are not included as there is no way to tell what it is. 
#i.e. if a patient has a taxane therapy and an 899 then the patients prior chemo classification will be taxane 


ifelse(grepl("Anthracycline ",Prior_chemo_Onc$MATERIAL_2)==TRUE & grepl(" Taxane",Prior_chemo_Onc$MATERIAL_2)==TRUE,"Anthracycline + Taxane",
       ifelse(grepl("Anthracycline ",Prior_chemo_Onc$MATERIAL_2)==TRUE | grepl("Anthracycline",Prior_chemo_Onc$MATERIAL_2)==TRUE & grepl("Non",Prior_chemo_Onc$MATERIAL_2)==FALSE,"Anthracycline",
       ifelse(grepl("Taxane ",Prior_chemo_Onc$MATERIAL_2)==TRUE | grepl("Taxane",Prior_chemo_Onc$MATERIAL_2)==TRUE & grepl("Non",Prior_chemo_Onc$MATERIAL_2)==FALSE,"Taxane",
       ifelse(grepl("Non-Anthracycline/Non-Taxane",Prior_chemo_Onc$MATERIAL_2)==TRUE,"Non-Anthracycline/Non-Taxane",
              ifelse(grepl("Combination unknown", Prior_chemo_Onc$MATERIAL_2)==TRUE,"Combination unknown",NA)))))->Prior_chemo_Onc$`Chemotherapy type`


full_join(Prior_chemo_pats,Prior_chemo_Onc)->Prior_chemo_Onc

Prior_chemo_Onc %>% group_by(`Chemotherapy type`) %>% 
  summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior chemotherapy patients` = round2(`Number of patients`/n_distinct(Prior_chemo_pats$UPI_NUMBER)*100,2)) ->Prior_chemo_type_Onc_sum

rbind(Prior_chemo_type_Onc_sum,c("Total",colSums(Prior_chemo_type_Onc_sum[,-1])))-> Prior_chemo_type_Onc_sum

Prior chemotherapy type is dervied by using both the SESCD and Chemocare, the SESCD covers historical chemotherapies as Chemocare was introduced in the latter half of 2012.

Prior Chemotherapy Type - SESCD

Prior Chemotherapy Type is dervied by classifing the MATERIAL variable into the following categories:

  • Anthracycline
  • Taxane
  • Anthracycline + Taxane
  • Non-Anthracycline/Non-Taxane
  • Unknown Combination

If a patient receives a combination of Anthracycline and Non-Anthracycline/Non-Taxane the Chemotherapy Type is classified as Anthracycline, similarly for Taxane combinations.

If a patient receives both Anthracycline and Taxane the Chemotherapy Type is classified as Anthracycline + Taxane. Unknown combination is derived from the SESCD where there are existing categories which indicate a various combination of drugs without any specificity.

Prior_chemo_type_Onc_sum %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Chemotherapy type Number of patients % of prior chemotherapy patients
Anthracycline 29 28.71
Anthracycline + Taxane 61 60.4
Combination unknown 1 0.99
Non-Anthracycline/Non-Taxane 2 1.98
Taxane 4 3.96
NA 4 3.96
Total 101 100

Prior Chemotherapy type - Chemocare

#type of chemotherapy from chemocare post 2012
#Join the prior chemo patients from SMR06 and SESCD 

left_join(Prior_chemo_pats,Chemocare, by=c("UPI_NUMBER"="CHI")) %>% 
  select(UPI_NUMBER,Met_inc_date,`Appointment Date`,`Drug Type`,`Drug Name`,Regime) %>% 
  mutate(timediff=difftime(Met_inc_date,`Appointment Date`,units=c("weeks"))) %>% 
  filter(timediff>0 & `Drug Type`=="Cytotoxic") -> Prior_Chemo_type_ChemoC


#Prior_Chemo_type_ChemoC$Regime %>% table(useNA = "always")

Prior_Chemo_type_ChemoC$Regime %>% as.factor() %>% 
  fct_recode("Non-Anthracycline/Non-Taxane"="CMF ADJ",
             "Non-Anthracycline/Non-Taxane"="CAPECITABINE LD",
             "Anthracycline"="EPI/CYCLO ADJ",
             "Anthracycline"="FEC 100",
             "Anthracycline"="FEC 80",
             "Anthracycline + Taxane"="FEC-D (D)",
             "Anthracycline + Taxane"="FEC-D (FEC)",
             "Anthracycline + Taxane"="FEC-D NEO (D)",
             "Anthracycline + Taxane"="FEC-D NEO (FEC)",
             "Anthracycline" ="EPI/CYCLO ADJ",
             "Anthracycline" ="EPI/CYCLO MET",
             "Taxane"="PACLITAX WKLY",
             "Taxane"="DOCETAXEL&CYCLO")->Prior_Chemo_type_ChemoC$Regime1




Prior_Chemo_type_ChemoC %>% distinct(UPI_NUMBER,Regime1) -> Prior_Chemo_type_ChemoC_temp

suppressWarnings(Prior_Chemo_type_ChemoC_temp %>% group_by(UPI_NUMBER) %>% 
  mutate(Regime2=glue_collapse(Regime1,sep=" "))) %>% 
  select(UPI_NUMBER,Regime2) -> Prior_Chemo_type_ChemoC_temp

full_join(Prior_Chemo_type_ChemoC,Prior_Chemo_type_ChemoC_temp) ->Prior_Chemo_type_ChemoC

rm(Prior_Chemo_type_ChemoC_temp)

#The assumption is made that the combination codes are not included as there is no way to tell what it is. 
#i.e. if a patient has a taxane therapy and an 899 then the patients prior chemo classification will be taxane 


ifelse(grepl("Anthracycline ",Prior_Chemo_type_ChemoC$Regime2)==TRUE & grepl(" Taxane",Prior_Chemo_type_ChemoC$Regime2)==TRUE,"Anthracycline + Taxane",
       ifelse(grepl("Anthracycline ",Prior_Chemo_type_ChemoC$Regime2)==TRUE | grepl("Anthracycline",Prior_Chemo_type_ChemoC$Regime2)==TRUE & grepl("Non",Prior_Chemo_type_ChemoC$Regime2)==FALSE,"Anthracycline",
       ifelse(grepl("Taxane ",Prior_Chemo_type_ChemoC$Regime2)==TRUE | grepl("Taxane",Prior_Chemo_type_ChemoC$Regime2)==TRUE & grepl("Non",Prior_Chemo_type_ChemoC$Regime2)==FALSE,"Taxane",
       ifelse(grepl("Non-Anthracycline/Non-Taxane",Prior_Chemo_type_ChemoC$Regime2)==TRUE,"Non-Anthracycline/Non-Taxane",
              ifelse(grepl("Combination unknown", Prior_Chemo_type_ChemoC$Regime2)==TRUE,"Combination unknown",NA)))))->Prior_Chemo_type_ChemoC$`Chemotherapy type`



full_join(Prior_chemo_pats,Prior_Chemo_type_ChemoC) ->Prior_Chemo_type_ChemoC

Prior_Chemo_type_ChemoC %>% group_by(`Chemotherapy type`) %>% 
  summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior Chemotherapy patients`=round2(`Number of patients`/n_distinct(Prior_chemo_pats$UPI_NUMBER)*100,2)) -> Prior_chemo_type_ChemoC_summary

rbind(Prior_chemo_type_ChemoC_summary,c("Total",colSums(Prior_chemo_type_ChemoC_summary[,-1])))-> Prior_chemo_type_ChemoC_summary

Patients that were identifed as prior chemotherapy patients are those who have records occuring before the date of metastatic diagnosis in Chemocare.

Prior_chemo_type_ChemoC_summary %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Chemotherapy type Number of patients % of prior Chemotherapy patients
Anthracycline 2 1.98
Anthracycline + Taxane 24 23.76
Taxane 5 4.95
NA 70 69.31
Total 101 100
#filtering the SESCD chemotherapy information to just the patients found in chemocare shows that the SESCD
#shows that these patients are already classified as Anthracycline + Taxane patients
#Therefore combining the two will make no difference
#Prior chemotherapy SESCD and chemocare combined 
full_join(Prior_Chemo_type_ChemoC,Prior_chemo_Onc, by=c("UPI_NUMBER")) %>% select(UPI_NUMBER,`Chemotherapy type.x`,`Appointment Date`,`Chemotherapy type.y`,STDATE) -> Prior_chemo_type_comb

ifelse(Prior_chemo_type_comb$`Chemotherapy type.x`==Prior_chemo_type_comb$`Chemotherapy type.y`,0,1)-> Prior_chemo_type_comb$change


#By taking the UPI numbers which have a change between SESCD and chemocare and joining them back into the dataset you can see how many patients have a record that has changed and one that has not. Which is 0. 
#Prior_chemo_type_comb %>% filter(change==1) %>% left_join(Prior_chemo_type_comb) %>% view()

#Final prior chemo and prior chemo type ----
#Combine the chemotherapies that are not the same in chemocare vs SESCD 

ifelse(Prior_chemo_type_comb$change==1,paste(Prior_chemo_type_comb$`Chemotherapy type.x`,Prior_chemo_type_comb$`Chemotherapy type.y`,sep=" "),Prior_chemo_type_comb$`Chemotherapy type.x`) -> Prior_chemo_type_comb$`Chemotherapy type`

ifelse(is.na(Prior_chemo_type_comb$`Chemotherapy type.x`)==1,Prior_chemo_type_comb$`Chemotherapy type.y`,
       ifelse(is.na(Prior_chemo_type_comb$`Chemotherapy type.y`)==1,Prior_chemo_type_comb$`Chemotherapy type.x`,Prior_chemo_type_comb$`Chemotherapy type`))->Prior_chemo_type_comb$`Chemotherapy type`


#Change the "change" variable to 0 if it's NA 

ifelse(is.na(Prior_chemo_type_comb$change)==1,0,Prior_chemo_type_comb$change) -> Prior_chemo_type_comb$change

Prior_chemo_type_comb %>% filter(change==1) %>% select(UPI_NUMBER) %>% n_distinct() ->Prior_chemo_type_change

#Prior_chemo_type_comb$`Chemotherapy type` %>% table(useNA ="always") %>% view()


ifelse(grepl("Anthracycline ",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE & grepl(" Taxane",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE | grepl(" Anthracycline",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE & grepl("Taxane ",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE  ,"Anthracycline + Taxane",
       ifelse(grepl("Anthracycline ",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE | grepl("Anthracycline",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE & grepl("Non",Prior_chemo_type_comb$`Chemotherapy type`)==FALSE,"Anthracycline",
       ifelse(grepl("Taxane ",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE | grepl("Taxane",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE & grepl("Non",Prior_chemo_type_comb$`Chemotherapy type`)==FALSE,"Taxane",
       ifelse(grepl("Non-Anthracycline/Non-Taxane",Prior_chemo_type_comb$`Chemotherapy type`)==TRUE,"Non-Anthracycline/Non-Taxane",
              ifelse(grepl("Combination unknown", Prior_chemo_type_comb$`Chemotherapy type`)==TRUE,"Combination unknown",NA)))))->Prior_chemo_type_comb$`Chemotherapy type`


left_join(Prior_chemo_pats,Prior_chemo_type_comb) ->Prior_chemo_type_comb

Prior_chemo_type_comb %>% filter(is.na(`Chemotherapy type`)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->Prior_chemo_type_comb_NA


Prior_chemo_type_comb %>% group_by(`Chemotherapy type`) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior chemotherapy patients`= round2(`Number of patients`/n_distinct(Prior_chemo_pats$UPI_NUMBER)*100,2))-> Prior_chemo_type_comb_summary

rbind(Prior_chemo_type_comb_summary,c("Total",colSums(Prior_chemo_type_comb_summary[,-1])))-> Prior_chemo_type_comb_summary

Prior Chemotherapy Type - Final

Finally the records from the SESCD and Chemocare are combined, there are 5 (4.95%) patients who have changed prior chemotherapy type after the datasets have combined. There are 2 (1.98%) patients which have a record of prior chemotherapy in SMR06 but type cannot be found in the SESCD or Chemocare.

Prior_chemo_type_comb_summary %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Chemotherapy type Number of patients % of prior chemotherapy patients
Anthracycline 27 26.73
Anthracycline + Taxane 65 64.36
Combination unknown 1 0.99
Non-Anthracycline/Non-Taxane 2 1.98
Taxane 4 3.96
NA 2 1.98
Total 101 100

Prior Hormone Therapy Y/N

#Prior Endocrine therapy Y/N ----
#Use both SMR06, the SESCD, PIS, and Fulvestrant audit
#SMR06

Recurrent_SMR06 %>% filter(is.na(HORM_THERAPY)==1) %>%
 select(UPI_NUMBER,Met_inc_date,DATE_1ST_HORM) %>%
 mutate(timediff=difftime(Met_inc_date,DATE_1ST_HORM,units=c("weeks"))) %>% 
  filter(timediff>0) %>% 
  select(UPI_NUMBER) %>% n_distinct() ->NA_SMR06_HORM_pats

#No 9's, so no coded missing values 
#SMR06 %>% filter(UPI_NUMBER %in% Recurrent_SMR06$UPI_NUMBER,Met_inc_date>DATE_1ST_HORM) %>% 
#  group_by(HORM_THERAPY) %>% 
#  summarise(Number_of_pats=n_distinct(UPI_NUMBER)) 

Recurrent_SMR06 %>% filter(HORM_THERAPY==1) %>%
 select(UPI_NUMBER,Met_inc_date,HORM_THERAPY,DATE_1ST_HORM) %>%
 mutate(timediff=difftime(Met_inc_date,DATE_1ST_HORM,units=c("weeks"))) %>% 
  filter(timediff>0) ->Prior_horm_SMR06 


n_distinct(Prior_horm_SMR06$UPI_NUMBER) ->Prior_horm_SMR06_pats

#SESCD Hormone database
# joining with recurrent bc patients to get hormone therapy treatments occuring before date of metastatic diagnosis. 

left_join(Recurrent_SMR06,Onc_Main_Hormone, by=c("UPI_NUMBER","Met_inc_date")) %>% 
  mutate(timediff_onc=difftime(Met_inc_date,STDATE,units=c("weeks"))) %>% 
  filter(is.na(timediff_onc)==0) %>% 
  filter(timediff_onc>0) %>% 
  select(UPI_NUMBER,Met_inc_date,timediff_onc,STDATE,ENDDATE,METHOD,MATERIAL)-> Prior_horm_Onc


n_distinct(Prior_horm_Onc$UPI_NUMBER)-> Prior_horm_Onc_pats


anti_join(Prior_horm_Onc,Prior_horm_SMR06,by="UPI_NUMBER") %>% select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_Onc_notSMR06_pats

anti_join(Prior_horm_SMR06,Prior_horm_Onc,by="UPI_NUMBER") %>% 
  select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_SMR06_notOnc_pats




#PIS DATA
left_join(Recurrent_SMR06,PIS)  %>% 
  filter(is.na(`APT Date`)==1) %>% 
  select(UPI_NUMBER) %>% 
  n_distinct()-> PIS_Missing_STDATE


left_join(Recurrent_SMR06,PIS) %>% 
  mutate(timediff_PIS=difftime(Met_inc_date,`APT Date`,units=c("weeks"))) %>% 
  filter(is.na(timediff_PIS)==0) %>% 
  filter(timediff_PIS>0) -> Prior_horm_PIS


n_distinct(Prior_horm_PIS$UPI_NUMBER)-> Prior_horm_PIS_pats



anti_join(Prior_horm_PIS,Prior_horm_SMR06,by="UPI_NUMBER") %>% select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_PIS_notSMR06_pats

anti_join(Prior_horm_SMR06,Prior_horm_PIS,by="UPI_NUMBER") %>% 
  select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_SMR06_notPIS_pats

#Patients found in PIS but not Onc database and vice versa

anti_join(Prior_horm_PIS,Prior_horm_Onc,by="UPI_NUMBER") %>% select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_PIS_notOnc_pats

anti_join(Prior_horm_Onc,Prior_horm_PIS,by="UPI_NUMBER") %>% 
  select(UPI_NUMBER) %>% 
  n_distinct() ->Prior_horm_SMR06_notOnc_pats



#Fulvestrant data 

Fulvestrant<-read_excel("Z:/Oncology Data/Cancer LCDI/Fulvestrant Audit/Fulvestrant Only MM.xlsx")

Fulvestrant$`STDATE` %>% as.Date() ->Fulvestrant$`STDATE`


left_join(Recurrent_SMR06,Fulvestrant)  %>% 
  filter(is.na(STDATE)==1) %>% 
  select(UPI_NUMBER) %>% 
  n_distinct()-> Fulvestrant_Missing_STDATE
  

left_join(Recurrent_SMR06,Fulvestrant) %>% 
  mutate(timediff_fulvestrant=difftime(Met_inc_date,STDATE,units=c("weeks"))) %>% 
  filter(is.na(timediff_fulvestrant)==0) %>% 
  filter(timediff_fulvestrant>0) -> Prior_horm_fulvestrant
  
Prior_horm_fulvestrant$UPI_NUMBER %>% n_distinct() -> Prior_horm_fulv_patients
# No patients have a prior record of fulvestrant 



 
#### Joining them up together

full_join(Prior_horm_Onc,Prior_horm_SMR06,by=c("UPI_NUMBER","Met_inc_date")) %>% 
  select(UPI_NUMBER,Met_inc_date) %>% 
  distinct(UPI_NUMBER, .keep_all=TRUE) ->Prior_horm_pats

full_join(Prior_horm_pats,Prior_horm_PIS) %>% 
  select(UPI_NUMBER,Met_inc_date) %>% 
  distinct(UPI_NUMBER, .keep_all=TRUE) ->Prior_horm_pats


Recurrent_SMR06 %>% mutate(`Prior Hormone Therapy` = ifelse(UPI_NUMBER %in% Prior_horm_pats$UPI_NUMBER,"Yes","No")) %>% 
  select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,Treatment_category,CHEMO,DATE_1ST_CHEMO,`Prior Chemotherapy`,HORM_THERAPY,DATE_1ST_HORM, `Prior Hormone Therapy`) ->Recurrent_SMR06

  Recurrent_SMR06 %>% 
  group_by(`Prior Hormone Therapy`) %>% 
  summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of recurrent patients`=round2(`Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2))-> Prior_horm_YN_summary
  
rbind(Prior_horm_YN_summary,c("Total",colSums(Prior_horm_YN_summary[,-1])))-> Prior_horm_YN_summary
    


## Prior Chemo Y/N by first line of treatment 

Recurrent_SMR06 %>% filter(`Prior Hormone Therapy` == "Yes" & `Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% 
  summarise(`Yes_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`Yes_% of recurrent patients`=round2(`Yes_Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2)) -> Prior_horm_Y_summary

Recurrent_SMR06 %>% filter(`Prior Hormone Therapy` == "No" & `Line of Treatment`=="1st") %>% group_by(Treatment_category) %>% 
  summarise(`No_Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`No_% of recurrent patients`=round2(`No_Number of patients`/n_distinct(Recurrent_SMR06$UPI_NUMBER)*100,2)) -> Prior_horm_N_summary

full_join(Prior_horm_Y_summary,Prior_horm_N_summary) %>% 
  rename(`1st line treatment`="Treatment_category")-> Prior_horm_YN_1st_summary


Prior_horm_YN_1st_summary %>% 
  set_names(gsub(".*_","", names(.))) -> Prior_horm_YN_1st_summary 

Prior hormone therapy in recurrent breast cancer patients is derived using the HORM_THERAPY flag in SMR06, hormone therapy records for breast cancer only in the SESCD and PIS occuring before the date of metastatic diagnosis.

There are 0 (0%) patients who have a missing HORM flag from records in SMR06 occuring before the date of metastatic diagnosis.

First recurrent patients are searched for in SMR06 with a HORMONE_THERAPY flag of 1 occuring before the date of metastatic diagnosis. Then the recurrent patients are seached for in the Hormone Therapy table from the SESCD, patients who have a record for hormone therapy before the date of metastatic diagnosis are considered to be prior hormone therapy patients.

There are 156 (89.66%) recurrent patients who were found to have prior hormone therapy using the SMR06 database.

Using the SESCD 153 (87.93%) recurrent patients were found to have a record of prior hormone therapy

In PIS there are 152 (87.36%) patients who have a record of prior hormone therapy.

SMR06 vs SESCD vs PIS

There are 17 (9.77%) patients who were found with a record for prior hormone therapy in PIS that were not found in the SESCD, vice versa there were 17 (9.77%) patients who had a record in the SESCD but not PIS.

There are 13 (7.47%) patients who were found with a record for prior hormone therapy in the SESCD that were not found in SMR06, vice versa there were 18 (10.34%) who had a record for prior hormone therapy in SMR06 but not the SESCD.

There are 13 (7.47%) patients who were found with a record for prior hormone therapy in PIS that were not found in the SMR06, vice verse there were 17 (9.77%) patients who had a record in the SMR06 but not PIS.

The prior hormone therapy patients from SMR06 and the SESCD are combined to give a final Prior Hormone Therapy Y/N flag.

Prior_horm_YN_summary %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Prior Hormone Therapy Number of patients % of recurrent patients
No 3 1.72
Yes 171 98.28
Total 174 100

Prior Hormone Therapy by 1st line of treatment

kable(Prior_horm_YN_1st_summary, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Prior Hormone therapy" = 2, "No Prior Hormone Therapy" = 2))
Prior Hormone therapy
No Prior Hormone Therapy
1st line treatment Number of patients % of recurrent patients Number of patients % of recurrent patients
Chemotherapy 35 20.11 2 1.15
Death 11 6.32 1 0.57
Endocrine 119 68.39 NA NA
No treatment 4 2.30 NA NA
Trial/other 2 1.15 NA NA

Prior Hormone Therapy Type

 #Prior hormone therapy type ----


 Prior_horm_PIS$`ePR DMD Drug Name` %>% as.factor() %>%
   fct_recode("AI"="ANASTROZOLE 1MG TABLETS",
              "AI"="LETROZOLE 2.5MG TABLETS",
              "AI"="EXEMESTANE 25MG TABLETS",
              "TAMOXIFEN" ="TAMOXIFEN 10MG TABLETS",
              "TAMOXIFEN" ="TAMOXIFEN 10MG/5ML ORAL SOLUTION SUGAR FREE",
              "TAMOXIFEN" ="TAMOXIFEN 10MG/5ML ORAL SUSPENSION",
              "TAMOXIFEN" ="TAMOXIFEN 20MG TABLETS") %>% as.character()->Prior_horm_PIS$`ePR DMD Drug Name`

suppressWarnings(Prior_horm_PIS %>% group_by(UPI_NUMBER) %>% 
  mutate(`ePR DMD Drug Name1`=glue_collapse(`ePR DMD Drug Name`,sep=" "))) ->Prior_horm_PIS

 
 
Prior_horm_Onc$`MATERIAL` %>% as.factor() %>%
   fct_recode("AI"="1",
              "Tamoxifen"="71",
              "AI"="82",
              "AI"="83",
              "AI"="85") %>% as.character()->Prior_horm_Onc$`MATERIAL`

suppressWarnings(Prior_horm_Onc %>% group_by(UPI_NUMBER) %>% 
  mutate(MATERIAL1=glue_collapse(MATERIAL,sep=" "))) ->Prior_horm_Onc




full_join(Prior_horm_Onc,Prior_horm_PIS) %>% 
  select(UPI_NUMBER,Met_inc_date,STDATE,MATERIAL1,`APT Date`, `ePR DMD Drug Name1`)-> Prior_hormone_type
 





ifelse(grepl("AI",Prior_hormone_type$MATERIAL1)==TRUE & grepl("Tamoxifen",Prior_hormone_type$MATERIAL1)==TRUE,"AI + Tamoxifen",
       ifelse(grepl("AI",Prior_hormone_type$MATERIAL1)==TRUE & grepl("Tamoxifen",Prior_hormone_type$MATERIAL1)==FALSE ,"AI",
              ifelse(grepl("AI",Prior_hormone_type$MATERIAL1)==FALSE & grepl("Tamoxifen",Prior_hormone_type$MATERIAL1),"Tamoxifen",NA))) -> Prior_hormone_type$MATERIAL


ifelse(grepl("AI",Prior_hormone_type$`ePR DMD Drug Name1`)==TRUE & grepl("TAMOXIFEN",Prior_hormone_type$`ePR DMD Drug Name1`)==TRUE,"AI + Tamoxifen",
       ifelse(grepl("AI",Prior_hormone_type$`ePR DMD Drug Name1`)==TRUE & grepl("TAMOXIFEN",Prior_hormone_type$`ePR DMD Drug Name1`)==FALSE ,"AI",
              ifelse(grepl("AI",Prior_hormone_type$`ePR DMD Drug Name1`)==FALSE & grepl("TAMOXIFEN",Prior_hormone_type$`ePR DMD Drug Name1`),"Tamoxifen",NA))) ->Prior_hormone_type$`ePR DMD Drug Name`
  

#Finding the number of patients who have changed when combining the SESCD dataset with PIS

ifelse(Prior_hormone_type$MATERIAL1==Prior_hormone_type$`ePR DMD Drug Name1`,0,1) ->Prior_hormone_type$change

ifelse(is.na(Prior_hormone_type$change)==1,0,Prior_hormone_type$change) -> Prior_hormone_type$change

Prior_hormone_type %>% filter(change==1) %>% select(UPI_NUMBER) %>% n_distinct() ->Prior_hormone_type_change
 
paste(Prior_hormone_type$`ePR DMD Drug Name`,Prior_hormone_type$MATERIAL,sep=" ") -> Prior_hormone_type$`Prior hormone therapy type`
 

ifelse(grepl("AI + Tamoxifen",Prior_hormone_type$`Prior hormone therapy type`)==TRUE, "AI + Tamoxifen",
       ifelse(grepl("AI",Prior_hormone_type$`Prior hormone therapy type`)==TRUE & grepl("Tamoxifen",Prior_hormone_type$`Prior hormone therapy type`)==TRUE,"AI + Tamoxifen",
       ifelse(grepl("AI",Prior_hormone_type$`Prior hormone therapy type`)==TRUE & grepl("Tamoxifen",Prior_hormone_type$`Prior hormone therapy type`)==FALSE ,"AI",
              ifelse(grepl("AI",Prior_hormone_type$`Prior hormone therapy type`)==FALSE & grepl("Tamoxifen",Prior_hormone_type$`Prior hormone therapy type`),"Tamoxifen",NA)))) ->  Prior_hormone_type$`Prior hormone therapy type`



left_join(Prior_horm_pats,Prior_hormone_type) ->Prior_hormone_type

Prior_hormone_type %>% filter(is.na(`Prior hormone therapy type`)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->Prior_horm_type_NA


Prior_hormone_type %>% group_by(`Prior hormone therapy type`) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior hormone patients`= round2(`Number of patients`/n_distinct(Prior_horm_pats$UPI_NUMBER)*100,2))-> Prior_horm_type_summary

rbind(Prior_horm_type_summary,c("Total",colSums(Prior_horm_type_summary[,-1])))-> Prior_horm_type_summary

Prior hormone therapy type is derived from the SESCD and PIS. The Fulvestrant Audit dataset was analysed but no fulvestrant was found to be prescribed before the date of metastatic diagnosis.

Good quality PIS data is only available from 2011 onwards therefore any hormone therapy prior to that will be found using the SESCD.

Prior hormone therapy type SESCD

Prior Hormone therapy type is derived by classifying the MATERIAL variable into the following categories:

  • AI (Aromataste Inhibitor)
  • Tamoxifen
  • AI + Tamoxifen
Prior_hormone_type %>% group_by(MATERIAL) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior hormone patients ` = round2(`Number of patients`/n_distinct(Prior_horm_pats$UPI_NUMBER)*100,2))  -> Prior_hormone_type_Onc
  
rbind(Prior_hormone_type_Onc,c("Total",colSums(Prior_hormone_type_Onc[,-1])))-> Prior_hormone_type_Onc


Prior_hormone_type_Onc %>% rename("Prior hormone therapy type"="MATERIAL") ->Prior_hormone_type_Onc

Prior_hormone_type_Onc %>%   kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left")
Prior hormone therapy type Number of patients % of prior hormone patients
AI 66 38.6
AI + Tamoxifen 33 19.3
Tamoxifen 54 31.58
NA 18 10.53
Total 171 100.01

Prior hormone therapy type PIS

Prior hormone therapy is derived from an e-Prescribed PIS report, this contains data generated when a prescription is issued from a GP and an e-message is sent instantly to PIS. The data does not determine whether the prescription made it to the pharmacy or not, only if it was prescribed. Again the ePR DMD Drug Name is classified into the following:

  • AI (Aromataste Inhibitor)
  • Tamoxifen
  • AI + Tamoxifen
Prior_hormone_type %>% group_by(`ePR DMD Drug Name`) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of prior hormone patients ` = round2(`Number of patients`/n_distinct(Prior_horm_pats$UPI_NUMBER)*100,2))  -> Prior_hormone_type_PIS
  
rbind(Prior_hormone_type_PIS,c("Total",colSums(Prior_hormone_type_PIS[,-1])))-> Prior_hormone_type_PIS

Prior_hormone_type_PIS %>% rename("Prior hormone therapy type"="ePR DMD Drug Name") ->Prior_hormone_type_PIS


Prior_hormone_type_PIS %>%   kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left")
Prior hormone therapy type Number of patients % of prior hormone patients
AI 77 45.03
AI + Tamoxifen 24 14.04
Tamoxifen 51 29.82
NA 19 11.11
Total 171 100

Prior hormone therapy type final

When the data from the SESCD is combined with the data from PIS 133 (77.78%) patients have changed hormone therapy type.

There is 1 (0.58%) patient who has a record for prior hormone therapy in SMR06 but no record of hormone therapy type from either the SESCD or PIS.

Prior_horm_type_summary %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Prior hormone therapy type Number of patients % of prior hormone patients
AI 68 39.77
AI + Tamoxifen 48 28.07
Tamoxifen 54 31.58
NA 1 0.58
Total 171 100

Time paid from last endocrine therapy to metastatic diagnosis

Prior_hormone_type %>% group_by(UPI_NUMBER) %>% 
  summarise(Met_inc_date=as.Date(max(Met_inc_date)),STDATE=as.Date(max(STDATE)),`APT Date`=as.Date(max(`APT Date`))) -> Prior_hormone_time

#checking to see if there are any SESCD records occuring after the date of metastasis 

Prior_hormone_time %>% mutate(Onc_most_recent=ifelse(STDATE>`APT Date`,1,0)) -> Prior_hormone_time
#There are no records on the SESCD that have a start date greater than the apt date on the PIS extract
#max(Prior_hormone_time$Onc_most_recent)

#Need to replace NA APT date with the date from the SESCD

ifelse(is.na(Prior_hormone_time$`APT Date`)==1,Prior_hormone_time$STDATE,Prior_hormone_time$`APT Date`)->Prior_hormone_time$`Last hormone therapy date`

as.Date(Prior_hormone_time$`Last hormone therapy date`,origin="1970-01-01") ->Prior_hormone_time$`Last hormone therapy date`

#calculating the time difference between the APT date and the met inc date

Prior_hormone_time %>% mutate(`Time to last hormone therapy`= as.numeric(difftime(Met_inc_date,`Last hormone therapy date`,units=c("days")))) ->Prior_hormone_time


Prior_hormone_time %>% filter(is.na(`Time to last hormone therapy`)==TRUE) %>% select(UPI_NUMBER) %>% n_distinct() -> prior_hormone_NAtime_pats

data.frame(`Measure`=c("Minimum","Mean","Median","Maximum") ,
            `Time_to_last_horm_therapy`=c(round2(min(Prior_hormone_time$`Time to last hormone therapy`,na.rm = TRUE),1),
                                                    round2(mean(Prior_hormone_time$`Time to last hormone therapy`,na.rm=TRUE),1),
                                                    round2(median(Prior_hormone_time$`Time to last hormone therapy`,na.rm=TRUE),1),
                                                    round2(max(Prior_hormone_time$`Time to last hormone therapy`,na.rm=TRUE),1))) -> Time_to_last_hormone_therapy_summary

rename(Time_to_last_hormone_therapy_summary,`Time to last hormone therapy (days)` = `Time_to_last_horm_therapy`) ->Time_to_last_hormone_therapy_summary

Time from last prescribed hormone therapy to metastatic diagnosis is derived by taking the most recent start or prescribed date from either the SESCD or PIS and calculating the time difference in days from the date of metastatic diagnosis.

In cases where the patient has a record from both PIS and the SESCD the date from PIS has been more recent that the date from the SESCD. If the date from PIS is missing then the date from the SESCD is used in its place.

Time_to_last_hormone_therapy_summary %>%   kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left")
Measure Time to last hormone therapy (days)
Minimum 1.0
Mean 511.1
Median 49.5
Maximum 6723.0

Patients receiving Goserelin

 #% of patients receing Goserelin
#prior and post metastatic diagnosis 
#Reading in Goserelin data 

read_excel("LCDI_Goserelin_patients_11July2019_V1.xlsx") ->Goserelin_pats

left_join(Cohort,Goserelin_pats, by=c("UPI_NUMBER"="Pat UPI [C]")) -> Goserelin_pats

Goserelin_pats$`APT Date` %>% as.Date() ->Goserelin_pats$`APT Date`

ifelse(is.na(Goserelin_pats$`APT Date`)==0,"Yes","No")->Goserelin_pats$`Goserelin received`

Goserelin_pats %>% mutate(timediff_goserelin=difftime(Met_inc_date,`APT Date`,units=c("weeks"))) -> Goserelin_pats

Goserelin_pats %>% group_by(`Goserelin received`) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of advanced breast cancer patients` = round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,2)) ->Goserelin_summary

rbind(Goserelin_summary,c("Total",colSums(Goserelin_summary[,-1])))-> Goserelin_summary

Patients receiving Goserelin is dervied by using e-Prescribed PIS data which covers the time period 2011 to 2017. If a patient has any record of Goserelin prescriptions they are flagged as having receieved Goserelin.

Goserelin_summary %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Goserelin received Number of patients % of advanced breast cancer patients
No 246 88.81
Yes 31 11.19
Total 277 100

Goserelin before or after metastatic date

Goserelin_pats %>% select(-`Goserelin received`) %>% mutate(`Goserelin received`= ifelse(timediff_goserelin>0,"Before date of metastasis",
                                                                                         ifelse(timediff_goserelin<=0,"After date of metastasis",NA))) ->Goserelin_pats


Goserelin_pats %>% group_by(`Goserelin received`) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of advanced breast cancer patients` = round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,2)) ->Goserelin_summary_before_after

rbind(Goserelin_summary_before_after,c("Total",colSums(Goserelin_summary_before_after[,-1])))-> Goserelin_summary_before_after

Goserelin_summary_before_after %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Goserelin received Number of patients % of advanced breast cancer patients
After date of metastasis 26 9.39
Before date of metastasis 9 3.25
NA 246 88.81
Total 281 101.45

Charlson Index

#Charlson Index ----
#SMR01A

# 
# SMR01A_colpos <- read_excel("Z:/Oncology Data/Cancer LCDI/SMR/SMR01A col names pos Charlson.xlsx")
# 
# SMR01A_from <- SMR01A_colpos$Position_From
# SMR01A_to<-SMR01A_colpos$Position_To
# SMR01A_names<-SMR01A_colpos$Field_name
# 
# 
# SMR01A <- read_fwf("N:/Upload/Linked Data/Raw Data/S_mrl_smr01a.dat", fwf_positions(SMR01A_from,SMR01A_to,
#                                                                                   col_names = SMR01A_names),guess_max = 200000)
# 
# 
# SMR01A %>% filter(UPI_NUMBER %in% Cohort$UPI_NUMBER)->SMR01A
# 
# 
# #SMR01B
# 
# SMR01B_colpos <- read_excel("Z:/Oncology Data/Cancer LCDI/SMR/SMR01B col names pos Charlson.xlsx")
# 
# SMR01B_from <- SMR01B_colpos$Position_From
# SMR01B_to<-SMR01B_colpos$Position_To
# SMR01B_names<-SMR01B_colpos$Field_name
# 
# 
# SMR01B <- read_fwf("N:/Upload/Linked Data/Raw Data/S_mrl_smr01b.dat", fwf_positions(SMR01B_from,SMR01B_to,
#                                                                                     col_names = SMR01B_names),guess_max = 200000)
# SMR01B %>% filter(UPI_NUMBER %in% Cohort$UPI_NUMBER) ->SMR01B
# 
# SMR01<- rbind(SMR01A,SMR01B)
# 
# rm(SMR01A,SMR01B)
# 
# 
# SMR01$DISCHARGE_DATE<-ymd(SMR01$DISCHARGE_DATE)
# SMR01$Discharge_year <- year(SMR01$DISCHARGE_DATE)
# 
# SMR01 %>% filter(SMR01$Discharge_year>=2009) ->SMR01
# 
# cbind(RECTYPE="SMR01",SMR01) ->SMR01
# 
# SMR01 %>% write_xlsx("SMR01.xlsx")


SMR01<- read_xlsx("SMR01.xlsx")




####Charlson base ----  

Charlson_base <- SMR01
  
Charlson_base<-left_join(Cohort,Charlson_base,by="UPI_NUMBER")

Charlson_base <- arrange(Charlson_base,UPI_NUMBER,Met_inc_date)


Charlson_base <- arrange(Charlson_base,UPI_NUMBER,Met_inc_date,DISCHARGE_DATE)

year(Charlson_base$Met_inc_date) -> Charlson_base$Met_inc_year


#removing "-" from ICD9

Charlson_base$MAIN_CONDITION %>% str_replace("-", "")->Charlson_base$MAIN_CONDITION
Charlson_base$OTHER_CONDITION_1 %>% str_replace("-", "")->Charlson_base$OTHER_CONDITION_1
Charlson_base$OTHER_CONDITION_2 %>% str_replace("-", "")->Charlson_base$OTHER_CONDITION_2
Charlson_base$OTHER_CONDITION_3 %>% str_replace("-", "")->Charlson_base$OTHER_CONDITION_3
Charlson_base$OTHER_CONDITION_4 %>% str_replace("-", "")->Charlson_base$OTHER_CONDITION_4
Charlson_base$OTHER_CONDITION_5 %>% str_replace("-", "")->Charlson_base$OTHER_CONDITION_5




#Calculating date difference to create two datastes for the different types of Charlson.

Charlson_base$date_diff<- as.numeric(difftime(Charlson_base$Met_inc_date,Charlson_base$DISCHARGE_DATE),unit="weeks")/52.25 
Charlson_base$date_diff_months<-as.numeric(difftime(Charlson_base$Met_inc_date,Charlson_base$DISCHARGE_DATE),unit="weeks")/4.345

Charlson_base_Quan <-Charlson_base

#Charlson method Original 1987 weights ----
#1987 weights 
#Myocardial_infarction
#ICD10
Charlson_base$Myocardial_infar_flag =0


ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("I21","I22")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("I21","I22")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("I21","I22")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("I21","I22")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("I21","I22")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("I21","I22")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("I252"),1,Charlson_base$Myocardial_infar_flag) ->Charlson_base$Myocardial_infar_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("410","412"),1,Charlson_base$Myocardial_infar_flag) -> Charlson_base$Myocardial_infar_flag

#Congestive Heart Failure
#ICD10

Charlson_base$Cong_heart_fail_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("I43","I50")|substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("I43","I50")|substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("I43","I50")|substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("I43","I50")|substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("I43","I50")|substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("I43","I50")|substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$MAIN_CONDITION,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$OTHER_CONDITION_1,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$OTHER_CONDITION_2,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$OTHER_CONDITION_3,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$OTHER_CONDITION_4,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base$OTHER_CONDITION_5,1, 5) %in% c("428"),1,Charlson_base$Cong_heart_fail_flag) ->Charlson_base$Cong_heart_fail_flag

#Peripheral Vascular Disease 
#ICD10

Charlson_base$Periph_vasc_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("I70","I71")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("I70","I71")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("I70","I71")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("I70","I71")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("I70","I71")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("I70","I71")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("I731","I738","I739","I771","I790","I792","K551","K558","K559","V434","Z958","Z959"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("0930","4373","4431","4432","4438","4439","4471","5571","5579")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("440","441"),1,Charlson_base$Periph_vasc_flag) ->Charlson_base$Periph_vasc_flag


#Cerebrovasc_flag
#ICD10

Charlson_base$Cerebrovasc_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("G45","G46","I60","I61","I62","I63","I64","I65","I66","I67","I68","I69")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("H340"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 5) %in% c("36234")| substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 5) %in% c("36234")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 5) %in% c("36234")| substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 5) %in% c("36234")| substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 5) %in% c("36234")| substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 5) %in% c("36234")| substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("430","431","432","433","434","435","436","437","438"),1,Charlson_base$Cerebrovasc_flag) ->Charlson_base$Cerebrovasc_flag

#Dementia
#ICD10
Charlson_base$Dementia_flag=0
  
ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("F051","G311"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag

#ICD09


ifelse(substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("290")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,3) %in% c("290")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,3) %in% c("290")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,3) %in% c("290")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,3) %in% c("290")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,3) %in% c("290")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("2941","3312"),1,Charlson_base$Dementia_flag) ->Charlson_base$Dementia_flag

#Chronic Pulmonary Disease
#ICD10

Charlson_base$Pulmonary_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base$Pulmonary_flag) ->Charlson_base$Pulmonary_flag

#Connective tissue disease - rheumatic disease
#ICD10

Charlson_base$Con_tiss_disease_rheum_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag

#ICD09


ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("725"),1,Charlson_base$Con_tiss_disease_rheum_flag) ->Charlson_base$Con_tiss_disease_rheum_flag


#Peptic Ucler Diseasse
#ICD10

Charlson_base$Peptic_ulcer_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("K25","K26","K27","K28"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag

#ICD09
ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("531","532","533","534"),1,Charlson_base$Peptic_ulcer_flag) ->Charlson_base$Peptic_ulcer_flag

#Diabetes - max weight from without/with complications will be used
#Diabetes without complications

Charlson_base$Diabetes_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("E100","E101","E106","E108","E109","E110","E111","E116","E118","E119","E120","E121","E126","E128","E129","E130","E131","E136","E138","E139","E140","E141","E146","E148","E149"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag


#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("2500","2501","2502","2503","2508","2509"),1,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag

#Diabetes with complications 
#ICD10

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("2504","2505","2506","2507"),2,Charlson_base$Diabetes_flag) ->Charlson_base$Diabetes_flag


#Paraplegia and Hemiplegia 
#ICD10
Charlson_base$Para_hemiplegia_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("G81","G82")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("G81","G82")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("G81","G82")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("G81","G82")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("G81","G82")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("G81","G82")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag

#ICD09
ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("342","343"),2,Charlson_base$Para_hemiplegia_flag) ->Charlson_base$Para_hemiplegia_flag


#Renal Disease 
#ICD10
Charlson_base$Renal_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("N18","N19")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("N18","N19")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("N18","N19")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("N18","N19")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("N18","N19")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("N18","N19")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("582","585","586","V56"),2,Charlson_base$Renal_flag) ->Charlson_base$Renal_flag

#Liver disease 
#Max weight of mild and moderate/severe will be used
#Mild liver disease

#ICD10

Charlson_base$Liver_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag

#ICD09
ifelse(substring(Charlson_base$MAIN_CONDITION,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$MAIN_CONDITION,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$MAIN_CONDITION,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$OTHER_CONDITION_1,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$OTHER_CONDITION_1,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$OTHER_CONDITION_2,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$OTHER_CONDITION_2,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$OTHER_CONDITION_3,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$OTHER_CONDITION_3,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$OTHER_CONDITION_4,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$OTHER_CONDITION_4,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base$OTHER_CONDITION_5,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base$OTHER_CONDITION_5,1,3) %in% c("570","571") ,1,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag

#Moderate or severe liver disease

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag

#ICD09

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),3,Charlson_base$Liver_flag) ->Charlson_base$Liver_flag

#AIDS/HIV
#ICD10

Charlson_base$AIDS_HIV_flag=0

ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("B20","B21","B22","B24"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag

#ICD09
ifelse(substring(Charlson_base$MAIN_CONDITION,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_1,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_2,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_3,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_4,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag
ifelse(substring(Charlson_base$OTHER_CONDITION_5,1, 3) %in% c("042","043","044"),6,Charlson_base$AIDS_HIV_flag) ->Charlson_base$AIDS_HIV_flag



Charlson_base %>% 
  filter(date_diff >0 & date_diff<=5 )-> Charlson_base


#Aggregating
Charlson_base %>% 
  group_by(UPI_NUMBER,Met_inc_date,Met_inc_year) %>% 
  summarise(Myocardial_infar_flag=max(Myocardial_infar_flag),
            Cong_heart_fail_flag=max(Cong_heart_fail_flag),
            Periph_vasc_flag=max(Periph_vasc_flag),
            Cerebrovasc_flag=max(Cerebrovasc_flag),
            Dementia_flag=max(Dementia_flag),
            Pulmonary_flag=max(Pulmonary_flag),
            Con_tiss_disease_rheum_flag=max(Con_tiss_disease_rheum_flag),
            Peptic_ulcer_flag=max(Peptic_ulcer_flag),
            Diabetes_flag=max(Diabetes_flag),
            Para_hemiplegia_flag=max(Para_hemiplegia_flag),
            Renal_flag=max(Renal_flag),
            Liver_flag=max(Liver_flag),
            AIDS_HIV_flag=max(AIDS_HIV_flag)) %>% 
  ungroup()->Charlson_1

Charlson_1 %>% 
  summarise(number_unique_pats=n_distinct(UPI_NUMBER))->Charlson_2

Charlson_1$Charlson_score <- Charlson_1$Myocardial_infar_flag + Charlson_1$Cong_heart_fail_flag + Charlson_1$Periph_vasc_flag +
  Charlson_1$Cerebrovasc_flag + Charlson_1$Dementia_flag + Charlson_1$Pulmonary_flag + Charlson_1$Con_tiss_disease_rheum_flag +
  Charlson_1$Peptic_ulcer_flag + Charlson_1$Diabetes_flag + Charlson_1$Para_hemiplegia_flag + Charlson_1$Renal_flag +
  Charlson_1$Liver_flag + Charlson_1$Liver_flag + Charlson_1$AIDS_HIV_flag 




Charlson_1 %>% 
  filter(Myocardial_infar_flag>0) %>% 
  summarise(`Myocardial infarction`=n_distinct(UPI_NUMBER))->Charlson_a


Charlson_1 %>% 
  filter(Cong_heart_fail_flag>0) %>% 
  summarise(`Congestive heart failure`=n_distinct(UPI_NUMBER))->Charlson_b


Charlson_1 %>% 
  filter(Periph_vasc_flag>0) %>% 
  summarise(`Peripheral vascular disease`=n_distinct(UPI_NUMBER))->Charlson_c


Charlson_1 %>% 
  filter(Cerebrovasc_flag>0) %>% 
  summarise(`Cerebrovascular disease`=n_distinct(UPI_NUMBER))->Charlson_d


Charlson_1 %>% 
  filter(Dementia_flag>0) %>% 
  summarise(`Dementia`=n_distinct(UPI_NUMBER))->Charlson_e


Charlson_1 %>% 
  filter(Pulmonary_flag>0) %>% 
  summarise(`Chronic pulmonary disease`=n_distinct(UPI_NUMBER))->Charlson_f


Charlson_1 %>% 
  filter(Con_tiss_disease_rheum_flag>0) %>% 
  summarise(`Connective tissue/rheumatologic disease`=n_distinct(UPI_NUMBER))->Charlson_g


Charlson_1 %>% 
  filter(Peptic_ulcer_flag>0) %>% 
  summarise(`Peptic ulcer`=n_distinct(UPI_NUMBER))->Charlson_h


Charlson_1 %>% 
  filter(Diabetes_flag>0) %>% 
  summarise(`Diabetes`=n_distinct(UPI_NUMBER))->Charlson_i


Charlson_1 %>% 
  filter(Para_hemiplegia_flag>0) %>% 
  summarise(`Hemiplegia/Paraplegia`=n_distinct(UPI_NUMBER))->Charlson_j


Charlson_1 %>% 
  filter(Renal_flag>0) %>% 
  summarise(`Renal disease`=n_distinct(UPI_NUMBER))->Charlson_k


Charlson_1 %>% 
  filter(Liver_flag>0) %>% 
  summarise(`Liver disease`=n_distinct(UPI_NUMBER))->Charlson_l


Charlson_1 %>% 
  filter(AIDS_HIV_flag>0) %>% 
  summarise(`HIV/AIDS`=n_distinct(UPI_NUMBER))->Charlson_m

           


#Mean Charlson score

Charlson_1 %>% 
  summarise(`Mean_allpats` = round2(mean(Charlson_score),1)) -> Charlson_1987_average

Charlson_1 %>% 
  filter(Charlson_score>0) %>% 
  summarise(`Mean_gt0` = round2(mean(Charlson_score),1)) -> Charlson_1987_average_1


cbind(Charlson_a,Charlson_b,Charlson_c,Charlson_d,Charlson_e,Charlson_f,Charlson_g,Charlson_h,Charlson_i,Charlson_j,Charlson_k,Charlson_l,
      Charlson_m) -> Charlson_1987


Charlson_1987[is.na(Charlson_1987)] <- 0 

gather(Charlson_1987) -> Charlson_1987

round2(Charlson_1987$value,1)->Charlson_1987$value

rename(Charlson_1987,`Condition`=`key`,`Number of patients`=`value`) -> Charlson_1987

Charlson_1987 %>% mutate(`% of advanced breast cancer patients`=round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,1)) ->Charlson_1987

The following table shows the number of patients who have had a condition under the original Charlson 1987 methods in the a five year time period before date of metastatic diagnosis.

The mean Charlson 1987 score of the cohort is 0.4. However the mean score for patients with a Charlson comorbidity score >0 is 2

Charlson_1987 %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
  footnote("Please note that patients can have more than one condition")
Condition Number of patients % of advanced breast cancer patients
Myocardial infarction 2 0.7
Congestive heart failure 8 2.9
Peripheral vascular disease 2 0.7
Cerebrovascular disease 3 1.1
Dementia 5 1.8
Chronic pulmonary disease 13 4.7
Connective tissue/rheumatologic disease 4 1.4
Peptic ulcer 2 0.7
Diabetes 9 3.2
Hemiplegia/Paraplegia 2 0.7
Renal disease 4 1.4
Liver disease 8 2.9
HIV/AIDS 0 0.0
Note:
Please note that patients can have more than one condition
#Quan weights 2011 ---- 

Charlson_base_Quan$Cong_heart_Fail_flag =0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_Fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("I43","I50")|substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","P290"),2,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag

#ICD09

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$MAIN_CONDITION,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$MAIN_CONDITION,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$OTHER_CONDITION_1,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$OTHER_CONDITION_1,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$OTHER_CONDITION_2,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$OTHER_CONDITION_2,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$OTHER_CONDITION_3,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$OTHER_CONDITION_3,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$OTHER_CONDITION_4,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$OTHER_CONDITION_4,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 5) %in% c("39891","40201","40211","40291","40401","40403","40411","40413","40491","40493")|substring(Charlson_base_Quan$OTHER_CONDITION_5,2, 4) %in% c("4254","4255","4257","4258","4259")|substring(Charlson_base_Quan$OTHER_CONDITION_5,2, 5) %in% c("428"),1,Charlson_base_Quan$Cong_heart_fail_flag) ->Charlson_base_Quan$Cong_heart_fail_flag


#Dementia
#ICD10
Charlson_base_Quan$Dementia_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("F00","F01","F02","F03","G30")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("F051","G311"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag

#ICD09


ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("290")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,3) %in% c("290")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,3) %in% c("290")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,3) %in% c("290")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,3) %in% c("290")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,3) %in% c("290")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("2941","3312"),2,Charlson_base_Quan$Dementia_flag) ->Charlson_base_Quan$Dementia_flag


#Chronic Pulmonary Disease
#ICD10

Charlson_base_Quan$Pulmonary_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("I278","I279","J684","J701","J703"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag

#ICD09

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("4168","4169","5064","5081","5088")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,3) %in% c("490","491","492","493","494","495","496","500","501","502","503","504","505"),1,Charlson_base_Quan$Pulmonary_flag) ->Charlson_base_Quan$Pulmonary_flag

#Connective tissue disease
Charlson_base_Quan$Con_tiss_disease_rheum_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("M05","M32","M33","M34","M06")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("M315","M351","M353","M360"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag

#ICD09


ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("4465","7100","7101","7102","7103","7104","7140","7141","7142","7148")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,3) %in% c("725"),1,Charlson_base_Quan$Con_tiss_disease_rheum_flag) ->Charlson_base_Quan$Con_tiss_disease_rheum_flag



#Diabetes with complications 
#ICD10

Charlson_base_Quan$Diabetes_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("E102","E103","E104","E105","E107","E112","E113","E114","E115","E117","E122","E123","E124","E125","E127","E132","E133","E134","E135","E137","E142","E143","E144","E145","E147"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag

#ICD09

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("2504","2505","2506","2507"),1,Charlson_base_Quan$Diabetes_flag) ->Charlson_base_Quan$Diabetes_flag

#Paraplegia and Hemiplegia

Charlson_base_Quan$Para_hemiplegia_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("G81","G82")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("G041","G114","G801","G802","G830","G831","G832","G833","G834","G839"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag

#ICD09
ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("3341","3440","3441","3442","3443","3444","3445","3446","3449")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,3) %in% c("342","343"),2,Charlson_base_Quan$Para_hemiplegia_flag) ->Charlson_base_Quan$Para_hemiplegia_flag

#Renal Disease 
#ICD10
Charlson_base_Quan$Renal_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("N18","N19")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("N052","N053","N054","N055","N056","N057","N250","I120","I131","N032","N033","N034","N035","N036","N037","Z490","Z491","Z492","Z940","Z992"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag

#ICD09

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$OTHER_CONDITION_1,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$OTHER_CONDITION_2,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$OTHER_CONDITION_3,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$OTHER_CONDITION_4,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 5) %in% c("40301","40311","40391","40402","40403","40412","40413","40492","40493")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("5830","5831","5832","5834","5836","5837","5880","V420","V451") | substring(Charlson_base_Quan$OTHER_CONDITION_5,1,3) %in% c("582","585","586","V56"),1,Charlson_base_Quan$Renal_flag) ->Charlson_base_Quan$Renal_flag


#Liver disease 
#Max weight of mild and moderate/severe will be used
#Mild liver disease

#ICD10

Charlson_base_Quan$Liver_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("B18","K73","K74")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("K700","K701","K702","K703","K709","K717","K713","K714","K715","K760","K762","K763","K764","K768","K769","V427","Z944"),2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag

#ICD09
ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$MAIN_CONDITION,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$MAIN_CONDITION,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$OTHER_CONDITION_1,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$OTHER_CONDITION_1,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$OTHER_CONDITION_2,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$OTHER_CONDITION_2,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$OTHER_CONDITION_3,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$OTHER_CONDITION_3,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$OTHER_CONDITION_4,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$OTHER_CONDITION_4,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1,5) %in% c("07022","07023","07032","07033","07044","07054")| substring(Charlson_base_Quan$OTHER_CONDITION_5,1,4) %in% c("0706","0709","5733","5734","5738","5739") |substring(Charlson_base_Quan$OTHER_CONDITION_5,1,3) %in% c("570","571") ,2,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag

#Moderate or severe liver disease

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("K704","K711","K721","K729","K765","K766","K767","I850","I859","I864","I982"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag

#ICD09

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 4) %in% c("4560","4561","4562","5722","5723","5724","5728"),4,Charlson_base_Quan$Liver_flag) ->Charlson_base_Quan$Liver_flag

#AIDS/HIV
#ICD10

Charlson_base_Quan$AIDS_HIV_flag=0

ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("B20","B21","B22","B24"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag

#ICD09
ifelse(substring(Charlson_base_Quan$MAIN_CONDITION,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_1,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_2,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_3,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_4,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag
ifelse(substring(Charlson_base_Quan$OTHER_CONDITION_5,1, 3) %in% c("042","043","044"),4,Charlson_base_Quan$AIDS_HIV_flag) ->Charlson_base_Quan$AIDS_HIV_flag

#No Cancer flag. 

#Aggregating Quan 2011 ---- 

Charlson_base_Quan %>%
  filter(date_diff >0 & date_diff<5 )-> Charlson_base_Quan

#Aggregating
Charlson_base_Quan %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(Cong_heart_fail_flag=max(Cong_heart_fail_flag),
            Dementia_flag=max(Dementia_flag),
            Pulmonary_flag=max(Pulmonary_flag),
            Con_tiss_disease_rheum_flag=max(Con_tiss_disease_rheum_flag),
            Diabetes_flag=max(Diabetes_flag),
            Para_hemiplegia_flag=max(Para_hemiplegia_flag),
            Renal_flag=max(Renal_flag),
            Liver_flag=max(Liver_flag),
            AIDS_HIV_flag=max(AIDS_HIV_flag))->Charlson_Quan_1

Charlson_Quan_1 %>% 
  summarise(number_unique_pats=n_distinct(UPI_NUMBER))->Charlson_Quan_2

Charlson_Quan_1$Charlson_score <-  Charlson_Quan_1$Cong_heart_fail_flag +  Charlson_Quan_1$Dementia_flag + Charlson_Quan_1$Pulmonary_flag + Charlson_Quan_1$Con_tiss_disease_rheum_flag +
  Charlson_Quan_1$Diabetes_flag + Charlson_Quan_1$Para_hemiplegia_flag + Charlson_Quan_1$Renal_flag +
  Charlson_Quan_1$Liver_flag + Charlson_Quan_1$Liver_flag + Charlson_Quan_1$AIDS_HIV_flag


Charlson_Quan_1 %>% 
  filter(Cong_heart_fail_flag>0) %>% 
  summarise(`Congestive heart failure`=n_distinct(UPI_NUMBER))->Charlson_Quan_a


Charlson_Quan_1 %>% 
  filter(Dementia_flag>0) %>% 
  summarise(`Dementia`=n_distinct(UPI_NUMBER))->Charlson_Quan_b


Charlson_Quan_1 %>% 
  filter(Pulmonary_flag>0) %>% 
  summarise(`Chronic pulmonary disease`=n_distinct(UPI_NUMBER))->Charlson_Quan_c


Charlson_Quan_1 %>% 
  filter(Con_tiss_disease_rheum_flag>0) %>% 
  summarise(`Connective tissue/rheumatologic disease`=n_distinct(UPI_NUMBER))->Charlson_Quan_d


Charlson_Quan_1 %>% 
  filter(Diabetes_flag>0) %>% 
  summarise(`Diabetes`=n_distinct(UPI_NUMBER))->Charlson_Quan_e


Charlson_Quan_1 %>% 
  filter(Para_hemiplegia_flag>0) %>% 
  summarise(`Hemiplegia/Paraplegia`=n_distinct(UPI_NUMBER))->Charlson_Quan_f


Charlson_Quan_1 %>% 
  filter(Renal_flag>0) %>% 
  summarise(`Renal disease`=n_distinct(UPI_NUMBER))->Charlson_Quan_g


Charlson_Quan_1 %>% 
  filter(Liver_flag>0) %>% 
  summarise(`Liver disease`=n_distinct(UPI_NUMBER))->Charlson_Quan_h


Charlson_Quan_1 %>% 
  filter(AIDS_HIV_flag>0) %>% 
  summarise(`HIV/AIDS`=n_distinct(UPI_NUMBER))->Charlson_Quan_i


#Mean Charlson score

Charlson_Quan_1 %>% 
  summarise(`Mean_allpats` = round2(mean(Charlson_score),1)) -> Charlson_Quan_2011_average

Charlson_Quan_1 %>% 
  filter(Charlson_score>0) %>% 
  summarise(`Mean_gt0` = round2(mean(Charlson_score),1)) -> Charlson_Quan_2011_average_1


cbind(Charlson_Quan_a,Charlson_Quan_b,Charlson_Quan_c,Charlson_Quan_d,Charlson_Quan_e,Charlson_Quan_f,Charlson_Quan_g,
      Charlson_Quan_h,Charlson_Quan_i)->Charlson_Quan

Charlson_Quan[is.na(Charlson_Quan)] <- 0 

gather(Charlson_Quan) -> Charlson_Quan

round2(Charlson_Quan$value,1)->Charlson_Quan$value

rename(Charlson_Quan,`Condition`=`key`,`Number of patients`=`value`) -> Charlson_Quan

Charlson_Quan %>% mutate(`% of advanced breast cancer patients`=round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,1)) ->Charlson_Quan

The following table shows the number of patients who have had a condition under the Charlson Quan 2011 methods in the a five year time period before date of metastatic diagnosis.

The mean Charlson Quan 2011 score of the cohort is 0.4. However the mean score for patients with a Charlson comorbidity score >0 is 2.6

Charlson_Quan %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>% 
    footnote("Please note that patients can have more than one condition")
Condition Number of patients % of advanced breast cancer patients
Congestive heart failure 8 2.9
Dementia 5 1.8
Chronic pulmonary disease 13 4.7
Connective tissue/rheumatologic disease 4 1.4
Diabetes 0 0.0
Hemiplegia/Paraplegia 2 0.7
Renal disease 4 1.4
Liver disease 8 2.9
HIV/AIDS 0 0.0
Note:
Please note that patients can have more than one condition
#Charlson Quan Barplot Scores

Charlson_Quan_1 %>% group_by(Charlson_score) %>% 
  summarise(`Number of patients` = n_distinct(UPI_NUMBER))->Charlson_quan_plotdata

  plot_ly(x=Charlson_quan_plotdata$Charlson_score,y=Charlson_quan_plotdata$`Number of patients`, type="bar", hoverinfo = 'text', marker = list(color = 'rgb(158,202,225)',
                      line = list(color = 'rgb(8,48,107)',
                                  width = 1.5)),
          text = ~paste('Charlson Score:',Charlson_quan_plotdata$Charlson_score,
                        '<br> Number of Patients:', Charlson_quan_plotdata$`Number of patients`)) %>%
  layout(title = "Charlson Score (Quan) of metastatic breast cancer patients diagnosed between 2013 and 2017")

Performance Status

#Reading in a dataset containing the first line of treatment for the cohort and the date for said treatment
read_xlsx("1st_line_treatment.xlsx")-> first_line_treat

first_line_treat %>% filter(UPI_NUMBER %in% Cohort$UPI_NUMBER) -> first_line_treat

#reading in the chemotherapy performance status datasets for 2013-2017

read_xlsx("Z:/Oncology Data/Cancer LCDI/Chemocare/ActivityReport_24-07-2019 2013-2015.xlsx") ->Performance_Status_1
read_xlsx("Z:/Oncology Data/Cancer LCDI/Chemocare/ActivityReport_24-07-2019 2016-2017.xlsx") ->Performance_Status_2

rbind(Performance_Status_1,Performance_Status_2) ->Performance_Status_Chemocare
rm(Performance_Status_1,Performance_Status_2)

left_join(first_line_treat,Performance_Status_Chemocare,by=c("UPI_NUMBER"="CHI")) -> Performance_Status_Chemocare

Performance_Status_Chemocare$`Episode First Cycle Date` %>%  as.character() %>%  as.Date("%d-%m-%Y") -> Performance_Status_Chemocare$`Episode First Cycle Date`


Performance_Status_Chemocare %>% mutate(timediff_PS_metincdate = difftime(Met_inc_date,`Episode First Cycle Date`,units=c("days")),
                                        timediff_PS_firstline = difftime(first_STDATE,`Episode First Cycle Date`,units=c("days"))) -> Performance_Status_Chemocare

#Number of patients who have a chemotherapy record but no performance status recorded
Performance_Status_Chemocare %>% filter(is.na(`Performance Status`)==1 & is.na(`Episode First Cycle Date`)==0) %>% select(UPI_NUMBER) %>% 
  n_distinct() -> Perf_stat_missing_pats

#Number of patients who do not have a record in chemocare
Performance_Status_Chemocare %>% filter(is.na(`Performance Status`)==1 &  is.na(`Episode First Cycle Date`)==1) %>% select(UPI_NUMBER) %>% 
  n_distinct() -> Perf_stat_chemocare_missing_pats



#removing records where the there is a chemocare record but the performance status is missing

Performance_Status_Chemocare %>% filter(is.na(`Performance Status`)==0) -> Performance_Status_Chemocare



Performance_Status_Chemocare %>% mutate(Perfstat_metinc_30days = ifelse(timediff_PS_metincdate>=-30 & timediff_PS_metincdate<=30,1,0),
                                        Perfstat_firstline_30daysprior = ifelse(timediff_PS_firstline>=-30 & timediff_PS_firstline<0,1,0)) -> Performance_Status_Chemocare

#No patient has two different perf stats 


Performance_Status_Chemocare %>% group_by(UPI_NUMBER) %>%
  filter(Perfstat_metinc_30days==1) %>%
  mutate(closest_to_metincdate = ifelse(abs(timediff_PS_metincdate)==min(abs(timediff_PS_metincdate)),1,0)) %>%
  filter(closest_to_metincdate==1) %>% 
  full_join(Cohort, by=c("UPI_NUMBER"="UPI_NUMBER")) %>% 
  group_by(`Performance Status`) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of advanced breast cancer patients`= round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,1))-> Perfstat_metincdate_summary


#perfstat closest to first line of treatment


#No patient has two different perf stats 


Performance_Status_Chemocare %>% group_by(UPI_NUMBER) %>%
  filter(Perfstat_firstline_30daysprior==1) %>%
  mutate(closest_to_firstline = ifelse(abs(timediff_PS_firstline)==min(abs(timediff_PS_firstline)),1,0)) %>%
  filter(closest_to_firstline==1) %>% 
  full_join(Cohort, by=c("UPI_NUMBER"="UPI_NUMBER")) %>%
  group_by(`Performance Status`) %>% 
  summarise(`Number of patients`= n_distinct(UPI_NUMBER)) %>% 
mutate(`% of advanced breast cancer patients`= round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,1))-> Perfstat_firstline_summary

The variable Performance Status is used from the Activity report in Chemocare, please note that this will only be availble for patients who have received Chemotherapy from late 2012 onwards.

The patient’s performance score at the time of the referral (start of episode), not appointment booking or attendance. Therfore the Episode First Cycle Date is used as the date the performance status was recorded on.

Performance status at metastatic diagnosis

The time difference between the Metastatic diagnosis date and the Episode First Cycle Date is calculated. Performance status with the smallest time difference within +/- three months of the metastatic incidence date is taken.

Perfstat_metincdate_summary$`Performance Status` %>% 
  as.factor() %>% 
  fct_recode("Normal activities" = "000",
             "Only light work" = "001",
             "Unable to work, self caring, up to >50% of day"="002",
             "In bed/chair >50% of day"="003",
             "Disabled, in bed/chair all day"="004") -> Perfstat_metincdate_summary$`Performance Status`

Perfstat_metincdate_summary %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left")
Performance Status Number of patients % of advanced breast cancer patients
Normal activities 15 5.4
Only light work 24 8.7
Unable to work, self caring, up to >50% of day 8 2.9
In bed/chair >50% of day 2 0.7
NA 228 82.3

Performance status at first line of treatment

The time difference between the start date of the first line of treatment and the Episode First Cycle Date is calculated. Performance status with the smallest time difference up to three months before the first line of treatment is taken.

Perfstat_firstline_summary$`Performance Status` %>% 
  as.factor() %>% 
  fct_recode("Normal activities" = "000",
             "Only light work" = "001",
             "Unable to work, self caring, up to >50% of day"="002",
             "In bed/chair >50% of day"="003",
             "Disabled, in bed/chair all day"="004") -> Perfstat_firstline_summary$`Performance Status`


Perfstat_firstline_summary %>%  kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Performance Status Number of patients % of advanced breast cancer patients
Normal activities 7 2.5
Only light work 7 2.5
Unable to work, self caring, up to >50% of day 4 1.4
NA 259 93.5

SIMD

### ADDING SIMD - downloaded from http://www.isdscotland.org/Products-and-Services/GPD-Support/Deprivation/SIMD/

SIMD2016<-read_csv("Z:/Oncology Data/Cancer LCDI/SIMD/postcode_2017_2_simd2016.csv", guess_max = 150000)
SIMD2012<-read_csv("Z:/Oncology Data/Cancer LCDI/SIMD/postcode_2016_1_simd2012.csv", guess_max = 150000)
SIMD2009<-read_csv("Z:/Oncology Data/Cancer LCDI/SIMD/postcode_2012_2_simd2009v2.csv", guess_max = 150000)
SIMD2006<-read_csv("Z:/Oncology Data/Cancer LCDI/SIMD/postcode_2009_2_simd2006.csv", guess_max = 150000)
SIMD2004<-read_csv("Z:/Oncology Data/Cancer LCDI/SIMD/postcode_2006_2_simd2004.csv", guess_max = 150000)

#Matching SIMD ----
#Taking out all spaces in postcode to allow matching of files

SMR06$POSTCODE<- gsub(" ","",SMR06$POSTCODE)
SIMD2016$pc7 <- gsub(" ","",SIMD2016$pc7)
SIMD2012$pc7 <- gsub(" ","",SIMD2012$pc7)
SIMD2009$PC7 %>%  gsub(" ","",.) ->SIMD2009$PC7
SIMD2006$PC7 %>%  gsub(" ","",.) -> SIMD2006$PC7
SIMD2004$PC7 %>%  gsub(" ","",.) -> SIMD2004$PC7


SMR06$INCIDENCE_DATE %>% year() -> SMR06$INCIDENCE_YEAR

SMR06$SIMD1 <- ifelse(SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2004,(with(SIMD2004,simd2004_sc_quintile[match(SMR06$POSTCODE,PC7)])),
                         ifelse(SMR06$INCIDENCE_YEAR>2003 & SMR06$INCIDENCE_YEAR<2007,(with(SIMD2006,simd2006_sc_quintile[match(SMR06$POSTCODE,PC7)])),
                                ifelse(SMR06$INCIDENCE_YEAR>2006 & SMR06$INCIDENCE_YEAR<2010,(with(SIMD2009,simd2009v2_sc_quintile[match(SMR06$POSTCODE,PC7)])),
                                       ifelse(SMR06$INCIDENCE_YEAR>2009 & SMR06$INCIDENCE_YEAR<2014,(with(SIMD2012,simd2012_sc_quintile[match(SMR06$POSTCODE,pc7)])),
                                              ifelse(SMR06$INCIDENCE_YEAR>2013,with(SIMD2016,simd2016_sc_quintile[match(SMR06$POSTCODE,pc7)]),NA)
                                       ))))

SMR06$SIMD1 %>% as.numeric()->SMR06$SIMD1

#SIMD descriptions changed after 2006, recoding to ensure consistency

SMR06 %>%
  mutate(SIMD = case_when(
    SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2007 & SIMD1 == 1 ~ 5,
    SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2007 & SIMD1 == 2 ~ 4,
    SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2007 & SIMD1 == 3 ~ 3,
    SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2007 & SIMD1 == 4 ~ 2,
    SMR06$INCIDENCE_YEAR>1995 & SMR06$INCIDENCE_YEAR<2007 & SIMD1 == 5 ~ 1,
    TRUE ~ SIMD1
  )
  ) -> SMR06


#Applying lables

SMR06$SIMD %>%
  factor() %>%
  fct_recode("1 (most deprived)" = "1",
             "2" = "2",
             "3" = "3",
             "4" = "4",
             "5 (least deprived)" = "5"
  ) %>% as.character() -> SMR06$SIMD




#removing SIMD lookups to save space

rm(SIMD2004,SIMD2006,SIMD2009,SIMD2012,SIMD2016)

#Taking the SIMD closest to date of metastatic diagnosis as some patients have more than one SIMD

SMR06 %>% mutate(timediff=difftime(Met_inc_date,INCIDENCE_DATE, units=c("days")))->SMR06

SMR06 %>% group_by(UPI_NUMBER) %>% 
  summarise(timediff=min(abs(timediff))) %>% 
  left_join(SMR06,by=c("UPI_NUMBER","timediff")) %>% 
  select(UPI_NUMBER,Met_inc_date,INCIDENCE_DATE,INCIDENCE_YEAR,POSTCODE,SIMD,SIMD1)->SIMD



#creating summary table

# SIMD %>% group_by(UPI_NUMBER) %>% 
#   summarise(`Number of SIMD`=n_distinct(SIMD)) %>% view()

SIMD %>% group_by(SIMD) %>% 
  summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`% of cohort`= round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*100,1))  -> SIMD_summary

The postcode from SMR06 is used as SMR06 contains the greatest amount of the patient cohort. However this means that for the majority of patients the SIMD is at time of early breast cancer, not metastatic.

Please be aware that some patients can have more than one record in SMR06 and should take the record closest to date of metastatic diagnosis.

SIMD_summary %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
SIMD Number of patients % of cohort
1 (most deprived) 32 11.6
2 32 11.6
3 43 15.5
4 55 19.9
5 (least deprived) 96 34.7
NA 19 6.9

Height, weight and BMI

#There are many duplicated rows in this medicines utilisation report, there can also be different records of height/weight etc. at different appointment dates. 

read_xlsx("Z:/Oncology Data/Cancer LCDI/Chemocare/MedicineUtilisationReport_06-08-2019_01-06-52_BMI.xlsx") -> Chemocare_BMI

Chemocare_BMI$`Appointment Date` %>% as.Date("%d-%m-%Y") -> Chemocare_BMI$`Appointment Date`

#removing duplicated rows
Chemocare_BMI %>% distinct() -> Chemocare_BMI

#joining with cohort

left_join(Cohort, Chemocare_BMI,by=c("UPI_NUMBER"="CHI")) %>% distinct() -> Chemocare_BMI

#Turn 0 entries into NA

ifelse(Chemocare_BMI$Height==0 & Chemocare_BMI$Weight==0 & Chemocare_BMI$`Surface Area`==0,NA,Chemocare_BMI$Height)->Chemocare_BMI$Height
ifelse(Chemocare_BMI$Height==0 & Chemocare_BMI$Weight==0 & Chemocare_BMI$`Surface Area`==0,NA,Chemocare_BMI$Weight)->Chemocare_BMI$Weight
ifelse(Chemocare_BMI$Height==0 & Chemocare_BMI$Weight==0 & Chemocare_BMI$`Surface Area`==0,NA,Chemocare_BMI$`Surface Area`)->Chemocare_BMI$`Surface Area`
ifelse(Chemocare_BMI$Height==0 & Chemocare_BMI$Weight==0 & Chemocare_BMI$`Surface Area`==0,NA,Chemocare_BMI$`Appointment Date`)->Chemocare_BMI$`Appointment Date`

Chemocare_BMI$`Appointment Date` %>% as.Date(origin="1970-01-01") -> Chemocare_BMI$`Appointment Date`

#Select record closest to met inc date

Chemocare_BMI %>% mutate(timediff=difftime(Met_inc_date,`Appointment Date`,units=c("days"))) -> Chemocare_BMI


Chemocare_BMI %>% 
  mutate(timediff=abs(timediff)) %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(timediff1=min(timediff)) %>% 
  left_join(Chemocare_BMI,by=c("UPI_NUMBER")) %>% 
  mutate(timediff=abs(timediff)) %>% 
  filter(timediff==timediff1) %>% 
  right_join(Cohort)->Chemocare_BMI

#There is one patient who has two weights in the same day, the weights appear to be very close therefore the first weight is taken

Chemocare_BMI$Height %>% as.numeric() ->Chemocare_BMI$Height
Chemocare_BMI$Weight %>% as.numeric() ->Chemocare_BMI$Weight
Chemocare_BMI$`Surface Area` %>% as.numeric() ->Chemocare_BMI$`Surface Area`



Chemocare_BMI %>%   arrange(UPI_NUMBER) %>% 
  group_by(UPI_NUMBER) %>% 
  mutate(rank=rank(Weight)) %>% 
  filter(rank==1) %>% 
  mutate(BMI=round2(Weight/(Height^2),1)) ->Chemocare_BMI
  
#BMI

Chemocare_BMI %>% filter(is.na(BMI)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->NA_BMI
Chemocare_BMI %>% filter(is.na(Height)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->NA_Height
Chemocare_BMI %>% filter(is.na(Weight)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->NA_Weight
Chemocare_BMI %>% filter(is.na(Weight)==1) %>% select(UPI_NUMBER) %>% n_distinct() ->NA_BSA


data.frame(`Measure`=c("Min","Mean","Median","Max","Patients with missing values"),
           `BMI`= round2(c(min(Chemocare_BMI$BMI,na.rm=TRUE),mean(Chemocare_BMI$BMI,na.rm=TRUE),median(Chemocare_BMI$BMI,na.rm=TRUE),max(Chemocare_BMI$BMI,na.rm=TRUE),NA_BMI),1),
           `Height`= round2(c(min(Chemocare_BMI$Height,na.rm=TRUE),mean(Chemocare_BMI$Height,na.rm=TRUE),median(Chemocare_BMI$Height,na.rm=TRUE),max(Chemocare_BMI$Height,na.rm=TRUE),NA_Height),1),
           `Weight`= round2(c(min(Chemocare_BMI$Weight,na.rm=TRUE),mean(Chemocare_BMI$Weight,na.rm=TRUE),median(Chemocare_BMI$Weight,na.rm=TRUE),max(Chemocare_BMI$Weight,na.rm=TRUE),NA_Weight),1),
            `Body Surface Area`= round2(c(min(Chemocare_BMI$`Surface Area`,na.rm=TRUE),mean(Chemocare_BMI$`Surface Area`,na.rm=TRUE),median(Chemocare_BMI$`Surface Area`,na.rm=TRUE),max(Chemocare_BMI$`Surface Area`,na.rm=TRUE),NA_BSA),1)) ->BMI_summary

Height weight and BMI are derived from Chemocare, there can be more than one record per patient therefore the record closest to date of metastatic diagnosis is taken. One potiental issue found is that patients can have more than one weight measurement on the same appointment date. In this case only one patient was found with this anomaly and since the measurements were similar the first record was taken.

If the measurements were quite different or more than one patient had more than one record for the same appointment date, taking the mean would be a potential alternative.

There can be records which have a height, weight and body surface area of zero, these records are removed before the record closest to the date of metastatic diagnosis is selected.

BMI_summary %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Measure BMI Height Weight Body.Surface.Area
Min 19.7 1.6 48.0 1.5
Mean 28.5 1.6 75.0 1.8
Median 28.7 1.6 80.3 1.9
Max 37.0 1.7 91.3 1.9
Patients with missing values 196.0 196.0 196.0 196.0

Outcomes

Survival Analysis

Survival analysis has been calculated for the cohort using the Kaplan-Meier estimate, patients are censored when the time period has ended - 31/12/2017 - and the patient has not passed away. Please note that the x-axis shows the time between diagnosis and death or censoring, therefore patients who are diagnosed at the beginning of 2017 and have not passed away will be censored within year one.

THis plot is interactive, please hover the cursor over plot to see the survival probability, lower and upper confidence intervals.

After 1 year 82% of the cohort has survived and after 4 years 28% have survied.

  library(ggfortify)
  
  left_join(Cohort,NRS_Deaths) %>% 
    select(UPI_NUMBER,Met_inc_date,`Line of Treatment`,`Treatment_category`,first_STDATE,DATE_OF_DEATH) %>% 
    mutate(time_years=difftime(DATE_OF_DEATH,Met_inc_date,units=c("days"))/365.25,
           status=ifelse(is.na(time_years)==1,0,1),
           end_of_period=as.Date("2017-12-31"),
           time_years=ifelse(status==0,difftime(end_of_period,Met_inc_date,units=c("days"))/365.25,time_years),
           time_days=difftime(DATE_OF_DEATH, Met_inc_date, units=c("days"))) ->Survival_analysis
  
  km_fit <- survfit(Surv(time_years, status) ~ 1, data=Survival_analysis)
  
  summary(km_fit, times = c(1:5))->Surv_summary1
  
  cols<- lapply(c(2,6,15,16) , function(x) Surv_summary1[x])
  # Combine the columns into a data frame
  Surv_summary <- do.call(data.frame, cols)
  
  
  library(pacman)
  pacman::p_load(plotly)
  pacman::p_load(GGally)
  pacman::p_load(survival)
  pacman::p_load(cowplot)
  pacman::p_load(broom)
  pacman::p_load_current_gh("sahirbhatnagar/casebase")
  pacman::p_load(Epi)
  "%ni%" <- Negate("%in%")
  
  
  p1 <- GGally::ggsurv(km_fit, main = "Kaplan-Meier Curve for ER+ HER- Metastatic Breast Cancer Patients diagnosed between 2013 and 2017", xlab="Time (Years from metastatic diagnosis)") + theme_bw()


#Plots comparing first line of treatment 
#Excluding patients who have a first line of death or trials. 
#Patients with first line trials are excluded because the numbers are too small
Cohort %>% filter(`Line of Treatment`=="1st") %>% select(c(UPI_NUMBER,Treatment_category,Met_inc_date)) %>% right_join(Survival_analysis,by = c("UPI_NUMBER", "Met_inc_date")) %>%  mutate(time_years=difftime(DATE_OF_DEATH,Met_inc_date,units=c("days"))/365.25,
         status=ifelse(is.na(time_years)==1,0,1),
         end_of_period=as.Date("2017-12-31"),
         time_years=ifelse(status==0,difftime(end_of_period,Met_inc_date,units=c("days"))/365.25,time_years),
         time_days=difftime(DATE_OF_DEATH, Met_inc_date, units=c("days"))) %>% 
  mutate(Treatment_category=ifelse(Treatment_category.x %in% c("No treatment","Death"),"No treatment",Treatment_category.x)) %>% 
  filter(!(Treatment_category %in% c("Trial/other"))) ->Survival_analysis_1st


Survival_analysis_1st$Treatment_category %>% as_factor() -> Survival_analysis_1st$Treatment_category
Survival_analysis_1st %>% rename("First_Treatment"= "Treatment_category") -> Survival_analysis_1st


km_fit_1st <- survfit(Surv(time_years, status) ~ First_Treatment, data=Survival_analysis_1st)
  
  summary(km_fit_1st, times = c(1:5))->Surv_summary1_1st
  
  cols_1st<- lapply(c(2,11,6,16,17) , function(x) Surv_summary1_1st[x])
  # Combine the columns into a data frame
  Surv_summary1_1st <- do.call(data.frame, cols_1st)

Surv_summary1_1st$strata %>% as.factor() %>% 
  fct_recode("Chemotherapy"="First_Treatment=Chemotherapy",
             "Endocrine"="First_Treatment=Endocrine",
             "No treatment"="First_Treatment=No treatment") %>% 
  as.character() -> Surv_summary1_1st$strata
  
Surv_summary1_1st %>% rename("First treatment"="strata") -> Surv_summary1_1st

Surv_summary1_1st %>%  gather(variable,value,-(time:`First treatment`)) %>% 
  unite(temp,`First treatment`,variable) %>%  spread(temp, value) -> Surv_summary1_1st




p2 <- GGally::ggsurv(km_fit_1st, surv.col = c("#41b6c4","#253494","#a1dab4"), main = "Kaplan-Meier Curve for ER+ HER- Metastatic Breast Cancer Patients diagnosed between 2013 and 2017 by first line of treatment",xlab="Time (Years from metastatic diagnosis)") + theme_bw()

  ggplotly(p1) %>% layout(xaxis = list(range=c(0,5),ticks="outside"))
Surv_summary %>% rename("Years"="time",
                        "Survival probability"="surv",
                        "Lower 95% CI"="lower",
                        "Upper 95% CI"="upper") -> Surv_summary

Surv_summary %>% 
  round2(2) %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Years Survival probability Lower 95% CI Upper 95% CI
1 0.82 0.80 0.85
2 0.58 0.54 0.62
3 0.41 0.37 0.45
4 0.28 0.24 0.33

This Kaplan-Meier curve shows the probability of survival by what patients recevied as a first line of treatment. Patients categorised as “no treatment” are those who have no record of treatment on the datasets used in this example.

Please note that for survival analysis this definition of “no treatment” is different to the “no treatment” in the Deriving Line of treatment in ER+ HER2- advanced breast cancer patient report.

The definition of “no treatment” in the Deriving line of treatment report is patients who have no record of treatment in the datasets used but also have not died. This to allow plotting of patient pathways.

After two years the 64% of the cohort that had endocrine therapy as a first line treatment has survived compared to 45% of the patients who had chemotherapy as a first line treatment.

Survival_analysis_1st %>% group_by(First_Treatment) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  rename("First treatment" ="First_Treatment") %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
First treatment Number of patients
Endocrine 189
No treatment 25
Chemotherapy 61
#reorder columns so that endocrine is first

Surv_summary1_1st <- Surv_summary1_1st[c(1,5,6,7,2,3,4,8,9,10)]

Surv_summary1_1st %>% 
  set_names(gsub(".*_","", names(.))) -> Surv_summary1_1st 



Surv_summary1_1st %>% 
  round2(2) %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 3, "Chemotherapy" = 3,"No treatment"=3)) %>% 
  add_header_above(c(" " = 1, "First line of treatment"=9))
First line of treatment
Endocrine
Chemotherapy
No treatment
time lower surv upper lower surv upper lower surv upper
1 0.81 0.84 0.87 0.78 0.83 0.88 0.19 0.34 0.62
2 0.60 0.64 0.69 0.38 0.45 0.52 0.19 0.34 0.62
3 0.44 0.48 0.54 0.19 0.25 0.33 0.11 0.26 0.58
4 0.26 0.31 0.37 0.19 0.25 0.33 0.11 0.26 0.58
ggplotly(p2) %>% layout(xaxis = list(range=c(0,5),ticks="outside")) 

Mortality rates

Thirty and ninety day mortality rates were derived by calculating the time difference in dates between the metastatic incidence date date of death, patients who died within thirty and ninety days are flagged and summarised in the tables below.

There are 8 patients of the 277 advanced breast cancer patients that died within 30 days of diagnosis - overall crude mortality rate of 28.9

Of the 277 patients diagnosed with advanced breast cancer there are 20 patients died within 90 days of diagnosis - overall crude mortality rate of 72.2.

ifelse(Survival_analysis$time_days<=30,1,0)->Survival_analysis$`30daymortality`
ifelse(Survival_analysis$time_days<=90,1,0)->Survival_analysis$`90daymortality`

Survival_analysis %>% filter(is.na(DATE_OF_DEATH)==0) %>% summarise(n_pats=n_distinct(UPI_NUMBER)) ->Total_dead

Survival_analysis %>% filter(`30daymortality`==1) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(`Length`= "30 days",
         `Overall crude mortality rate per 1000`= round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*1000,1))->`30day_npats1`

`30day_npats1`[,c(2,1,3)] ->`30day_npats1`

Survival_analysis %>% filter(`90daymortality`==1) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) %>% 
  mutate(Length="90 days",
         `Overall crude mortality rate per 1000`= round2(`Number of patients`/n_distinct(Cohort$UPI_NUMBER)*1000,1))->`90day_npats1`

`90day_npats1`[,c(2,1,3)] ->`90day_npats1`


rbind(`30day_npats1`,`90day_npats1`) -> Mortality_rate_summary


Mortality_rate_summary %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Length Number of patients Overall crude mortality rate per 1000
30 days 8 28.9
90 days 20 72.2
#30 and 90 day mortality rates by first line of treatment 

ifelse(Survival_analysis_1st$time_days<=30,1,0)->Survival_analysis_1st$`30daymortality`
ifelse(Survival_analysis_1st$time_days<=90,1,0)->Survival_analysis_1st$`90daymortality`




##Denominators

Survival_analysis_1st %>% filter(First_Treatment=="Chemotherapy") %>% 
  select(UPI_NUMBER) %>% n_distinct() ->Total_Chemo_first_pats

Survival_analysis_1st %>% filter(First_Treatment=="Endocrine") %>% 
  select(UPI_NUMBER) %>% n_distinct()  ->Total_endo_first_pats

Survival_analysis_1st %>% filter(First_Treatment=="No treatment") %>% 
  select(UPI_NUMBER) %>% n_distinct() ->Total_notreat_first_pats



### Mortality rates by 1st line of treatment
Survival_analysis_1st %>% filter(`30daymortality`==1) %>%
  group_by(First_Treatment) %>%
  summarise(`30day_Number of patients`=n_distinct(UPI_NUMBER)) ->thirtyday_npats1st_1


  ifelse(thirtyday_npats1st_1$First_Treatment=="Endocrine",round2(thirtyday_npats1st_1$`30day_Number of patients`/Total_endo_first_pats*1000,1),
         ifelse(thirtyday_npats1st_1$First_Treatment=="No treatment",round2(thirtyday_npats1st_1$`30day_Number of patients`/Total_notreat_first_pats*1000,1),NA))-> thirtyday_npats1st_1$`30day_Overall crude mortality rate per 1000`
         



Survival_analysis_1st %>% filter(`90daymortality`==1) %>% group_by(First_Treatment) %>%  summarise(`90day_Number of patients`=n_distinct(UPI_NUMBER))->ninetyday_npats1st_1

  ifelse(ninetyday_npats1st_1$First_Treatment=="Endocrine",round2(ninetyday_npats1st_1$`90day_Number of patients`/Total_endo_first_pats*1000,1),
         ifelse(ninetyday_npats1st_1$First_Treatment=="No treatment",round2(ninetyday_npats1st_1$`90day_Number of patients`/Total_notreat_first_pats*1000,1),
                ifelse(ninetyday_npats1st_1$First_Treatment=="Chemotherapy",round2(ninetyday_npats1st_1$`90day_Number of patients`/Total_Chemo_first_pats*1000,1),NA)))-> ninetyday_npats1st_1$`90day_Overall crude mortality rate per 1000`



full_join(thirtyday_npats1st_1,ninetyday_npats1st_1) ->Mortality_rate_summary_1st


Mortality_rate_summary_1st %>% 
  set_names(gsub(".*_","", names(.))) -> Mortality_rate_summary_1st 




c(Total_endo_first_pats,Total_notreat_first_pats,Total_Chemo_first_pats)->   Mortality_rate_summary_1st$`Total patients on first line of treatment`

Mortality rates by first line of treatment

There are 6 patients that died within 90 days of diagnosis out of 189 first line endocrine therapy patients - overall crude mortality rate of 31.7 .

Of the 61 patients that had chemotherapy as a first line treatment 5 died within 90 days - overall crude mortality rate of 82.

kable(Mortality_rate_summary_1st, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "30 Day" = 2, "90 Day" = 2," "=1))
30 Day
90 Day
Treatment Number of patients Overall crude mortality rate per 1000 Number of patients Overall crude mortality rate per 1000 Total patients on first line of treatment
Endocrine 3 15.9 6 31.7 189
No treatment 5 200.0 9 360.0 25
Chemotherapy NA NA 5 82.0 61

Time to next line of treatment or death (days)

Time to next line ot treatment or death is derived using the time difference between the first start date of each line of treatment, patients moving from 1st to 2nd line of treatment have a median time of 178 days, this appears to decrease until patients move from 5th to 6th treatment where the time begins to increase until the 7th to 8th treatment.

#Calculating time between treatments 
ifelse(Line_of_treatment$Treatment_category=="No treatment",ymd("2017-12-31"),ymd(Line_of_treatment$first_STDATE)) -> Line_of_treatment$first_STDATE

Line_of_treatment$first_STDATE %>% as.Date(origin="1970-01-01") ->Line_of_treatment$first_STDATE

#recalculate days


Line_of_treatment %>%group_by(UPI_NUMBER) %>% 
  arrange(`Line of Treatment`) %>% 
  mutate(Time_between_treatments=as.numeric(abs(difftime(first_STDATE,shift(first_STDATE,1L,type="lead"),units=c("days"))))) ->Line_of_treatment

  Line_of_treatment %>% group_by(`Line of Treatment`) %>% summarise(`Minimum time` = as.numeric(min(Time_between_treatments,na.rm=TRUE)),
                                                                    `Mean time`= as.numeric(mean(Time_between_treatments,na.rm=TRUE)),
                                                                    `Median time`=as.numeric(median(Time_between_treatments,na.rm=TRUE)),
                                                                    `Maximum time`=as.numeric(max(Time_between_treatments,na.rm=TRUE))) ->Time_to_next_lot_summary 

  

Time_to_next_lot_summary[Time_to_next_lot_summary == "-Inf"|Time_to_next_lot_summary=="Inf"] <- NA
Time_to_next_lot_summary[Time_to_next_lot_summary == "NaN"] <-NA

Time_to_next_lot_summary$`Mean time` %>% round2(0) ->Time_to_next_lot_summary$`Mean time`
Time_to_next_lot_summary$`Median time` %>% round2(0) ->Time_to_next_lot_summary$`Median time`

Time_to_next_lot_summary %>% filter(`Line of Treatment`!="9th") ->Time_to_next_lot_summary

Time_to_next_lot_summary$`Line of Treatment` %>% as.factor() %>% 
  fct_recode("1st to 2nd"="1st",
             "2nd to 3rd"="2nd",
             "3rd to 4th"="3rd",
             "4th to 5th"="4th",
             "5th to 6th"="5th",
             "6th to 7th"="6th",
             "7th to 8th"="7th",
             "8th to 9th"="8th") %>% 
  as.character()-> Time_to_next_lot_summary$`Line of Treatment`



#Length of treatment broken down by 1st line of treatment 

Cohort %>% filter(`Line of Treatment`=="1st") %>% 
  rename("1st_LOT"=`Treatment_category`) %>% 
  select(UPI_NUMBER,`1st_LOT`) %>% right_join(Line_of_treatment, by=c("UPI_NUMBER")) %>% group_by(UPI_NUMBER) %>% 
  arrange(`Line of Treatment`) %>% 
  mutate(Time_between_treatments=abs(difftime(first_STDATE,shift(first_STDATE,1L,type="lead"),units=c("days")))) -> Length_of_treatment_1st

#Endocrine

 Length_of_treatment_1st %>% 
   filter(`1st_LOT`=="Endocrine") %>% 
   group_by(`Line of Treatment`) %>% 
   summarise(`endo_Minimum time` = as.numeric(min(Time_between_treatments,na.rm=TRUE)),
             `endo_Mean time`= as.numeric(mean(Time_between_treatments,na.rm=TRUE)),
             `endo_Median time`=as.numeric(median(Time_between_treatments,na.rm=TRUE)),
             `endo_Maximum time`=as.numeric(max(Time_between_treatments,na.rm=TRUE))) -> Time_to_next_lot_1st_endo
 
Time_to_next_lot_1st_endo[Time_to_next_lot_1st_endo == "-Inf"|Time_to_next_lot_1st_endo=="Inf"] <- NA
Time_to_next_lot_1st_endo[Time_to_next_lot_1st_endo == "NaN"] <-NA

Time_to_next_lot_1st_endo$`endo_Mean time` %>% round2(0) ->Time_to_next_lot_1st_endo$`endo_Mean time`
Time_to_next_lot_1st_endo$`endo_Median time` %>% round2(0) ->Time_to_next_lot_1st_endo$`endo_Median time`

Time_to_next_lot_1st_endo %>% filter(`Line of Treatment`!="8th") ->Time_to_next_lot_1st_endo

Time_to_next_lot_1st_endo$`Line of Treatment` %>% as.factor() %>% 
  fct_recode("1st to 2nd"="1st",
             "2nd to 3rd"="2nd",
             "3rd to 4th"="3rd",
             "4th to 5th"="4th",
             "5th to 6th"="5th",
             "6th to 7th"="6th",
             "7th to 8th"="7th") %>% 
  as.character()-> Time_to_next_lot_1st_endo$`Line of Treatment`



##Chemotherapy

 Length_of_treatment_1st %>% 
   filter(`1st_LOT`=="Chemotherapy") %>% 
   group_by(`Line of Treatment`) %>% 
   summarise(`chemo_Minimum time` = as.numeric(min(Time_between_treatments,na.rm=TRUE)),
             `chemo_Mean time`= as.numeric(mean(Time_between_treatments,na.rm=TRUE)),
             `chemo_Median time`=as.numeric(median(Time_between_treatments,na.rm=TRUE)),
             `chemo_Maximum time`=as.numeric(max(Time_between_treatments,na.rm=TRUE))) -> Time_to_next_lot_1st_chemo
   
Time_to_next_lot_1st_chemo[Time_to_next_lot_1st_chemo == "-Inf"|Time_to_next_lot_1st_chemo=="Inf"] <- NA
Time_to_next_lot_1st_chemo[Time_to_next_lot_1st_chemo == "NaN"] <-NA

Time_to_next_lot_1st_chemo$`chemo_Mean time` %>% round2(0) ->Time_to_next_lot_1st_chemo$`chemo_Mean time`
Time_to_next_lot_1st_chemo$`chemo_Median time` %>% round2(0) ->Time_to_next_lot_1st_chemo$`chemo_Median time`

Time_to_next_lot_1st_chemo %>% filter(`Line of Treatment`!="9th") ->Time_to_next_lot_1st_chemo

Time_to_next_lot_1st_chemo$`Line of Treatment` %>% as.factor() %>% 
  fct_recode("1st to 2nd"="1st",
             "2nd to 3rd"="2nd",
             "3rd to 4th"="3rd",
             "4th to 5th"="4th",
             "5th to 6th"="5th",
             "6th to 7th"="6th",
             "7th to 8th"="7th",
             "8th to 9th"="8th") %>% 
  as.character()-> Time_to_next_lot_1st_chemo$`Line of Treatment`
 
 
#Trial  
 
    
 Length_of_treatment_1st %>% 
   filter(`1st_LOT`=="Trial/other") %>% 
   group_by(`Line of Treatment`) %>% 
   summarise(`trial_Minimum time` = as.numeric(min(Time_between_treatments,na.rm=TRUE)),
             `trial_Mean time`= as.numeric(mean(Time_between_treatments,na.rm=TRUE)),
             `trial_Median time`=as.numeric(median(Time_between_treatments,na.rm=TRUE)),
             `trial_Maximum time`=as.numeric(max(Time_between_treatments,na.rm=TRUE))) -> Time_to_next_lot_1st_trial

Time_to_next_lot_1st_trial[Time_to_next_lot_1st_trial == "-Inf"|Time_to_next_lot_1st_trial=="Inf"] <- NA
Time_to_next_lot_1st_trial[Time_to_next_lot_1st_trial == "NaN"] <-NA

Time_to_next_lot_1st_trial$`trial_Mean time` %>% round2(0) ->Time_to_next_lot_1st_trial$`trial_Mean time`
Time_to_next_lot_1st_trial$`trial_Median time` %>% round2(0) ->Time_to_next_lot_1st_trial$`trial_Median time`

Time_to_next_lot_1st_trial %>% filter(`Line of Treatment`!="5th") ->Time_to_next_lot_1st_trial

Time_to_next_lot_1st_trial$`Line of Treatment` %>% as.factor() %>% 
  fct_recode("1st to 2nd"="1st",
             "2nd to 3rd"="2nd",
             "3rd to 4th"="3rd",
             "4th to 5th"="4th") %>% 
  as.character()-> Time_to_next_lot_1st_trial$`Line of Treatment`   
 
full_join(Time_to_next_lot_1st_endo,Time_to_next_lot_1st_chemo)  ->Time_to_next_lot_1st_summary
   

Time_to_next_lot_1st_summary %>% 
  set_names(gsub(".*_","", names(.))) -> Time_to_next_lot_1st_summary 


Time_to_next_lot_summary %>%   kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") 
Line of Treatment Minimum time Mean time Median time Maximum time
1st to 2nd 0 313 178 1774
2nd to 3rd 13 240 162 1455
3rd to 4th 1 205 131 1252
4th to 5th 12 156 98 574
5th to 6th 9 147 103 493
6th to 7th 27 143 132 356
7th to 8th 6 77 57 186
8th to 9th 24 24 24 24

Time to next line of treatment by first line of treatment

It appears that the median time to next treatment of patients who had endocrine therapy decreases until the 5th to 6th treatment and then increases for the 6th to 7th treatment.

However patients who had a first line treatment of chemotherapy appear to fluctuate between lines of treatment.

kable(Time_to_next_lot_1st_summary, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "1st line Endocrine" = 4, "1st Line Chemotherapy" = 4))
1st line Endocrine
1st Line Chemotherapy
Line of Treatment Minimum time Mean time Median time Maximum time Minimum time Mean time Median time Maximum time
1st to 2nd 0 370 232 1774 8 137 127 683
2nd to 3rd 16 222 154 1378 13 284 169 1455
3rd to 4th 1 227 134 1252 6 169 126 804
4th to 5th 12 175 115 574 28 128 98 371
5th to 6th 9 171 116 493 70 102 102 177
6th to 7th 27 160 132 356 49 122 138 161
7th to 8th 6 6 6 6 34 100 80 186
8th to 9th NA NA NA NA 24 24 24 24

Inpatient attendances and time spent in hospital while on treatment

Inpatient stays are defined using data from SMR01, chemotherapy and injection day cases are excluded. The assumption is made that day cases for injections are for fulvestrant or denosumab. Daycases with the following OPCS4 codes are excluded from the analysis:

  • UNSPECIFIED DELIVERY OF CHEMOTHERAPY FOR NEOPLASM X729
  • UNSPECIFIED DELIVERY OF ORAL CHEMOTHERAPY FOR NEOPLASM X739
  • INTRAVENOUS CHEMOTHERAPY X352
  • DELIVERY OF EXCLUSIVELY ORAL CHEMOTHERAPY FOR NEOPLASM X731

Daycases are identifed as records with a length of stay of 0, the inpatient daycase marker is not used as there are some cases where the marker is positive but the length of stay is 0.

The length of stay while on treatment is defined by identifying the admission dates that occur inbetween the start date of the lines of treatment e.g. the admission dates between the start date for 1st and 2nd line of treatment.

The length of stay in days is then calculated by summing the “length of stay” variable for those inpatient admissions occuring between the start date for the treatment.

This length of stay is then divided by the length of treatment in days to derive the percentage treatment time spent in hospital. Please note that there are some admissions which last longer than the treatment time, for simplicity these results are changed to 100%.

# SMR01colpos_burden <- read_excel("Z:/Oncology Data/Cancer LCDI/SMR/SMR01B col names pos burden.xlsx")
# 
# 
# SMR01B_from_burden <- SMR01colpos_burden$Position_From
# SMR01B_to_burden<-SMR01colpos_burden$Position_To
# SMR01B_names_burden<-SMR01colpos_burden$Field_name
# 
# SMR01B_burden <- read_fwf("N:/Upload/Linked Data/Raw Data/S_mrl_smr01b.dat", fwf_positions(SMR01B_from_burden,SMR01B_to_burden,
#                                                                                      col_names = SMR01B_names_burden),guess_max =500000)
# 
# SMR01B_burden %>% filter(UPI_NUMBER %in% Cohort$UPI_NUMBER) ->SMR01B_burden
# 
# SMR01B_burden$ADMISSION_DATE %>% ymd() ->SMR01B_burden$ADMISSION_DATE
# 
# 
# SMR01B_burden %>% filter(ADMISSION_DATE>"2012-12-31" & ADMISSION_DATE<="2017-12-31") ->SMR01_burden
# 
# SMR01_burden %>% write_xlsx("SMR01_burden.xlsx")

read_xlsx("SMR01_burden.xlsx",guess_max = 2000) -> SMR01_burden

Line_of_treatment %>% select(UPI_NUMBER) %>% left_join(SMR01_burden,by=c("UPI_NUMBER")) -> SMR01_burden


Line_of_treatment %>% 
select(UPI_NUMBER, `Line of Treatment`,first_STDATE)  %>%   spread(key=`Line of Treatment`,value=first_STDATE) %>% ungroup() ->LOT_Burden

  
full_join(LOT_Burden, SMR01_burden) %>% ungroup()  -> LOT_Burden

read_xlsx("opcs4.xlsx") ->OPCS4_lookup


#Matching the OPCS4 codes 

LOT_Burden$MAIN_OPERATION_A_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$MAIN_OPERATION_A,code)]))
LOT_Burden$MAIN_OPERATION_B_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$MAIN_OPERATION_B,code)]))
LOT_Burden$OTHER_OPERATION_1_A_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_1_A,code)]))
LOT_Burden$OTHER_OPERATION_1_B_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_1_B,code)]))
LOT_Burden$OTHER_OPERATION_2_A_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_2_A,code)]))
LOT_Burden$OTHER_OPERATION_2_B_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_2_B,code)]))
LOT_Burden$OTHER_OPERATION_3_A_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_3_A,code)]))
LOT_Burden$OTHER_OPERATION_3_B_desc <- (with(OPCS4_lookup,descrip[match(LOT_Burden$OTHER_OPERATION_3_B,code)]))

#Identify records who are chemotherapy daycases and injection daycases as these are most likely for denosumab or fulvestrant

# UNSPECIFIED DELIVERY OF CHEMOTHERAPY FOR NEOPLASM X729
# UNSPECIFIED DELIVERY OF ORAL CHEMOTHERAPY FOR NEOPLASM    X739
# INTRAVENOUS CHEMOTHERAPY  X352
# DELIVERY OF EXCLUSIVELY ORAL CHEMOTHERAPY FOR NEOPLASM    X731

#Dont use inpatient daycase marker as it appears that there are some patients marked with a daycase identifier of I but have a length of stay of 0

ifelse(LOT_Burden$LENGTH_OF_STAY==0  &
                        (LOT_Burden$MAIN_OPERATION_A %in% c("X729","X739","X352","X731","X388")|
                           LOT_Burden$MAIN_OPERATION_B %in% c("X729","X739","X352","X731","X388")|
                        LOT_Burden$OTHER_OPERATION_1_A %in% c("X729","X739","X352","X731","X388")|
                          LOT_Burden$OTHER_OPERATION_1_B %in% c("X729","X739","X352","X731","X388")|
                          LOT_Burden$OTHER_OPERATION_2_A %in% c("X729","X739","X352","X731","X388")|
                        LOT_Burden$OTHER_OPERATION_2_B %in% c("X729","X739","X352","X731","X388")|
                            LOT_Burden$OTHER_OPERATION_3_A %in% c("X729","X739","X352","X731","X388")|
                            LOT_Burden$OTHER_OPERATION_3_B %in% c("X729","X739","X352","X731","X388")),1,0)-> LOT_Burden$Chemotherapy_Daycase



#Remove the chemotherapy daycase records from the dataset as these should not count towards length of stay or number of admissions 

LOT_Burden %>% filter(Chemotherapy_Daycase==0) -> LOT_Burden

#checking that the data has been filtered correctly.


LOT_Burden$ADMISSION_DATE %>% ymd() ->LOT_Burden$ADMISSION_DATE

LOT_Burden %>% select(-c(ADMISSION,DISCHARGE,DISCHARGE_TYPE,DISCHARGE_TRANSFER_TO,DISCHARGE_TRANSFER_TO_LOCATION,MAIN_CONDITION,OTHER_CONDITION_1,OTHER_CONDITION_2,OTHER_CONDITION_3,OTHER_CONDITION_4,OTHER_CONDITION_5)) ->LOT_Burden

#9th treatment is no further record of treatment

ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`1st` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`2nd`,1,0)->LOT_Burden$ADMISSION_1st_2nd_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`2nd` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`3rd`,1,0)->LOT_Burden$ADMISSION_2nd_3rd_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`3rd` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`4th`,1,0)->LOT_Burden$ADMISSION_3rd_4th_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`4th` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`5th`,1,0)->LOT_Burden$ADMISSION_4th_5th_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`5th` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`6th`,1,0)->LOT_Burden$ADMISSION_5th_6th_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`6th` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`7th`,1,0)->LOT_Burden$ADMISSION_6th_7th_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`7th` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`8th`,1,0)->LOT_Burden$ADMISSION_7th_8th_LOT
ifelse(LOT_Burden$ADMISSION_DATE>=LOT_Burden$`8th` & LOT_Burden$ADMISSION_DATE<LOT_Burden$`9th`,1,0)->LOT_Burden$ADMISSION_8th_9th_LOT

#This accounts for patients who do not have a record of an inpatient attendance at all. 

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`1st`)==0 & is.na(LOT_Burden$`2nd`)==0,99,LOT_Burden$ADMISSION_1st_2nd_LOT)->LOT_Burden$ADMISSION_1st_2nd_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`2nd`)==0 & is.na(LOT_Burden$`3rd`)==0,99,LOT_Burden$ADMISSION_2nd_3rd_LOT)->LOT_Burden$ADMISSION_2nd_3rd_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`3rd`)==0 & is.na(LOT_Burden$`4th`)==0,99,LOT_Burden$ADMISSION_3rd_4th_LOT)->LOT_Burden$ADMISSION_3rd_4th_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`4th`)==0 & is.na(LOT_Burden$`5th`)==0,99,LOT_Burden$ADMISSION_4th_5th_LOT)->LOT_Burden$ADMISSION_4th_5th_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`5th`)==0 & is.na(LOT_Burden$`6th`)==0,99,LOT_Burden$ADMISSION_5th_6th_LOT)->LOT_Burden$ADMISSION_5th_6th_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`6th`)==0 & is.na(LOT_Burden$`7th`)==0,99,LOT_Burden$ADMISSION_6th_7th_LOT)->LOT_Burden$ADMISSION_6th_7th_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`7th`)==0 & is.na(LOT_Burden$`8th`)==0,99,LOT_Burden$ADMISSION_7th_8th_LOT)->LOT_Burden$ADMISSION_7th_8th_LOT

ifelse(is.na(LOT_Burden$ADMISSION_DATE)==1 & is.na(LOT_Burden$`8th`)==0 & is.na(LOT_Burden$`9th`)==0,99,LOT_Burden$ADMISSION_8th_9th_LOT)->LOT_Burden$ADMISSION_8th_9th_LOT

#Calculate the number and % of patients who have an inpatient record during their lines of treatment 

LOT_Burden %>% left_join(Line_of_treatment) %>% ungroup() -> LOT_Burden
  



LOT_Burden %>%  filter(ADMISSION_1st_2nd_LOT==1 & `Line of Treatment`=="1st") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "1st to 2nd") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_1stline

#taking all the patients who had a first line treatment and then joining them to the patients who had a first line treatment and an inpatient record

#1st to 2nd

#This should now include patients who have a first line treatment but no inpatient records 
#Need to set the length of stay and number of attendances to 0
#Need to set the line of treatment as `1st to 2nd` and time between treatments (days) to time between treatments 

Length_of_treatment_1st %>% filter(`Line of Treatment`=="1st") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_1stline) -> Inpatient_summary_1stline

ifelse(is.na(Inpatient_summary_1stline$`Time between treatments (days)`)==1,Inpatient_summary_1stline$Time_between_treatments,Inpatient_summary_1stline$`Time between treatments (days)`)->Inpatient_summary_1stline$`Time between treatments (days)`


Inpatient_summary_1stline$`Line of treatment`<-"1st to 2nd"

Inpatient_summary_1stline$`Length of stay (days)`[is.na(Inpatient_summary_1stline$`Length of stay (days)`)] <- 0

Inpatient_summary_1stline$`Number of inpatient attendances` [is.na(Inpatient_summary_1stline$`Number of inpatient attendances` )] <- 0

#2nd to 3rd 

LOT_Burden %>% 
  filter(ADMISSION_2nd_3rd_LOT==1 & `Line of Treatment`=="2nd") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "2nd to 3rd") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_2ndline

Length_of_treatment_1st %>% filter(`Line of Treatment`=="2nd") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_2ndline) -> Inpatient_summary_2ndline

ifelse(is.na(Inpatient_summary_2ndline$`Time between treatments (days)`)==1,Inpatient_summary_2ndline$Time_between_treatments,Inpatient_summary_2ndline$`Time between treatments (days)`)->Inpatient_summary_2ndline$`Time between treatments (days)`


Inpatient_summary_2ndline$`Line of treatment`<-"2nd to 3rd"

Inpatient_summary_2ndline$`Length of stay (days)`[is.na(Inpatient_summary_2ndline$`Length of stay (days)`)] <- 0

Inpatient_summary_2ndline$`Number of inpatient attendances` [is.na(Inpatient_summary_2ndline$`Number of inpatient attendances` )] <- 0



#3rd to 4th

LOT_Burden %>% 
  filter(ADMISSION_3rd_4th_LOT==1 & `Line of Treatment`=="3rd") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "3rd to 4th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_3rdline


Length_of_treatment_1st %>% filter(`Line of Treatment`=="3rd") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_3rdline) -> Inpatient_summary_3rdline

ifelse(is.na(Inpatient_summary_3rdline$`Time between treatments (days)`)==1,Inpatient_summary_3rdline$Time_between_treatments,Inpatient_summary_3rdline$`Time between treatments (days)`)->Inpatient_summary_3rdline$`Time between treatments (days)`


Inpatient_summary_3rdline$`Line of treatment`<-"3rd to 4th"

Inpatient_summary_3rdline$`Length of stay (days)`[is.na(Inpatient_summary_3rdline$`Length of stay (days)`)] <- 0

Inpatient_summary_3rdline$`Number of inpatient attendances` [is.na(Inpatient_summary_3rdline$`Number of inpatient attendances` )] <- 0




#4th to 5th

LOT_Burden %>%
  filter(ADMISSION_4th_5th_LOT==1 & `Line of Treatment` == "4th") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "4th to 5th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_4thline


Length_of_treatment_1st %>% filter(`Line of Treatment`=="4th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_4thline) -> Inpatient_summary_4thline

ifelse(is.na(Inpatient_summary_4thline$`Time between treatments (days)`)==1,Inpatient_summary_4thline$Time_between_treatments,Inpatient_summary_4thline$`Time between treatments (days)`)->Inpatient_summary_4thline$`Time between treatments (days)`


Inpatient_summary_4thline$`Line of treatment`<-"4th to 5th"

Inpatient_summary_4thline$`Length of stay (days)`[is.na(Inpatient_summary_4thline$`Length of stay (days)`)] <- 0

Inpatient_summary_4thline$`Number of inpatient attendances` [is.na(Inpatient_summary_4thline$`Number of inpatient attendances` )] <- 0



##5th to 6th

LOT_Burden %>% 
  filter(ADMISSION_5th_6th_LOT==1 & `Line of Treatment`=="5th") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "5th to 6th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_5thline

Length_of_treatment_1st %>% filter(`Line of Treatment`=="5th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_5thline) -> Inpatient_summary_5thline

ifelse(is.na(Inpatient_summary_5thline$`Time between treatments (days)`)==1,Inpatient_summary_5thline$Time_between_treatments,Inpatient_summary_5thline$`Time between treatments (days)`)->Inpatient_summary_5thline$`Time between treatments (days)`


Inpatient_summary_5thline$`Line of treatment`<-"5th to 6th"

Inpatient_summary_5thline$`Length of stay (days)`[is.na(Inpatient_summary_5thline$`Length of stay (days)`)] <- 0

Inpatient_summary_5thline$`Number of inpatient attendances` [is.na(Inpatient_summary_5thline$`Number of inpatient attendances` )] <- 0





#6th to 7th

LOT_Burden %>% 
  filter(ADMISSION_6th_7th_LOT==1 & `Line of Treatment`=="6th") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "6th to 7th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_6thline

Length_of_treatment_1st %>% filter(`Line of Treatment`=="6th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_6thline) -> Inpatient_summary_6thline

ifelse(is.na(Inpatient_summary_6thline$`Time between treatments (days)`)==1,Inpatient_summary_6thline$Time_between_treatments,Inpatient_summary_6thline$`Time between treatments (days)`)->Inpatient_summary_6thline$`Time between treatments (days)`


Inpatient_summary_6thline$`Line of treatment`<-"6th to 7th"

Inpatient_summary_6thline$`Length of stay (days)`[is.na(Inpatient_summary_6thline$`Length of stay (days)`)] <- 0

Inpatient_summary_6thline$`Number of inpatient attendances` [is.na(Inpatient_summary_6thline$`Number of inpatient attendances` )] <- 0


#7th to 8th

LOT_Burden %>% 
  filter(ADMISSION_7th_8th_LOT==1 & `Line of Treatment`=="7th") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "7th to 8th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_7thline

Length_of_treatment_1st %>% filter(`Line of Treatment`=="7th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_7thline) -> Inpatient_summary_7thline

ifelse(is.na(Inpatient_summary_7thline$`Time between treatments (days)`)==1,Inpatient_summary_7thline$Time_between_treatments,Inpatient_summary_7thline$`Time between treatments (days)`)->Inpatient_summary_7thline$`Time between treatments (days)`


Inpatient_summary_7thline$`Line of treatment`<-"7th to 8th"

Inpatient_summary_7thline$`Length of stay (days)`[is.na(Inpatient_summary_7thline$`Length of stay (days)`)] <- 0

Inpatient_summary_7thline$`Number of inpatient attendances` [is.na(Inpatient_summary_7thline$`Number of inpatient attendances` )] <- 0


#8th to 9th 

LOT_Burden %>% 
  filter(ADMISSION_8th_9th_LOT==1 & `Line of Treatment`=="8th") %>%
  unique() %>% 
  group_by(UPI_NUMBER) %>% 
  summarise(`Length of stay (days)` = sum(LENGTH_OF_STAY),
            `Time between treatments (days)`=max(Time_between_treatments,na.rm=TRUE),
            `Number of inpatient attendances` = n_distinct(CIS_MARKER)) %>% 
  mutate(`Line of treatment` = "8th to 9th") %>% 
  select(`Line of treatment`,everything())-> Inpatient_summary_8thline


Length_of_treatment_1st %>% filter(`Line of Treatment`=="8th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER,Time_between_treatments) %>% full_join(Inpatient_summary_8thline) -> Inpatient_summary_8thline

ifelse(is.na(Inpatient_summary_8thline$`Time between treatments (days)`)==1,Inpatient_summary_8thline$Time_between_treatments,Inpatient_summary_8thline$`Time between treatments (days)`)->Inpatient_summary_8thline$`Time between treatments (days)`


Inpatient_summary_8thline$`Line of treatment`<-"8th to 9th"

Inpatient_summary_8thline$`Length of stay (days)`[is.na(Inpatient_summary_8thline$`Length of stay (days)`)] <- 0

Inpatient_summary_8thline$`Number of inpatient attendances` [is.na(Inpatient_summary_8thline$`Number of inpatient attendances` )] <- 0





bind_rows(Inpatient_summary_1stline,Inpatient_summary_2ndline,Inpatient_summary_3rdline,Inpatient_summary_4thline,
          Inpatient_summary_5thline,Inpatient_summary_6thline,Inpatient_summary_7thline,Inpatient_summary_8thline) ->Inpatient_summary_alllines



Inpatient_summary_alllines %>%  mutate(`% of treatment time as inpatient`=round2(`Length of stay (days)`/`Time between treatments (days)`*100,1)) ->Inpatient_summary_alllines

#0/0 in R returns NaN, change it to 0 

ifelse(is.nan(Inpatient_summary_alllines$`% of treatment time as inpatient`)==1,0,Inpatient_summary_alllines$`% of treatment time as inpatient`) -> Inpatient_summary_alllines$`% of treatment time as inpatient`


#There are some cases where the length of stay is greater than the length of treatment
#In these cases to make the data appear more understandable the patients who have a % >100 will be changed to 100%

ifelse(Inpatient_summary_alllines$`% of treatment time as inpatient`>100,100,Inpatient_summary_alllines$`% of treatment time as inpatient`) -> Inpatient_summary_alllines$`% of treatment time as inpatient`

#Summaries
#Inpatient attendances

Inpatient_summary_alllines %>% group_by(`Line of treatment`) %>% 
  summarise(`Minimum`=min(`Number of inpatient attendances`),
            `Mean`=round2(mean(`Number of inpatient attendances`),1),
            `Median` = median(`Number of inpatient attendances`),
            `Maximum`= max(`Number of inpatient attendances`)) -> Inpatient_attendances_sum

Inpatient_summary_alllines %>% group_by(`Line of treatment`) %>% 
  summarise(`LOS_Minimum`=min(`Length of stay (days)`),
            `LOS_Mean`=round2(mean(`Length of stay (days)`),1),
            `LOS_Median` = median(`Length of stay (days)`),
            `LOS_Maximum`= max(`Length of stay (days)`)) -> Inpatient_LOS_sum

Inpatient_summary_alllines %>% group_by(`Line of treatment`) %>% 
  summarise(`LOT_Minimum`=min(`Time between treatments (days)`),
            `LOT_Mean`=round2(mean(`Time between treatments (days)`),1),
            `LOT_Median` = median(`Time between treatments (days)`),
            `LOT_Maximum`= max(`Time between treatments (days)`)) -> Inpatient_LOT_sum


Inpatient_summary_alllines %>% group_by(`Line of treatment`) %>% 
  summarise(`%_Minimum`=min(`% of treatment time as inpatient`),
            `%_Mean`=round2(mean(`% of treatment time as inpatient`),1),
            `%_Median` = median(`% of treatment time as inpatient`),
            `%_Maximum`= max(`% of treatment time as inpatient`)) -> Inpatient_percent_inpat_sum

full_join(Inpatient_LOS_sum,Inpatient_LOT_sum) %>% 
  full_join(Inpatient_percent_inpat_sum) -> Inpatient_time_summary

Inpatient_time_summary %>% 
  set_names(gsub(".*_","", names(.))) -> Inpatient_time_summary 

The table below shows the number of inpatient attendances while on line of treatment, 4th to 5th and 6th to 7th appear to be the treatments which have on average the highest number of attendances - 3.2.

kable(Inpatient_attendances_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Number of inpatient attendances" = 4))
Number of inpatient attendances
Line of treatment Minimum Mean Median Maximum
1st to 2nd 0 2.1 1.0 18
2nd to 3rd 0 2.1 1.0 11
3rd to 4th 0 2.1 1.0 15
4th to 5th 0 3.2 2.0 15
5th to 6th 0 2.5 2.0 9
6th to 7th 1 3.2 2.0 6
7th to 8th 0 1.8 1.5 4
8th to 9th 2 2.0 2.0 2

The mean length of stay in days is the longest for the 4th to 5th line of treatment - 11.5 days. The treatment with the greatest mean % of time spent in hospital is also the 4th to 5th line of treatment.

kable(Inpatient_time_summary, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Length of stay (days)" = 4, "Length of treatment (days)" = 4, "% of treatment time as an inpatient"=4))
Length of stay (days)
Length of treatment (days)
% of treatment time as an inpatient
Line of treatment Minimum Mean Median Maximum Minimum Mean Median Maximum Minimum Mean Median Maximum
1st to 2nd 0 9.7 1.0 197 0 312.5 177.5 1774 0 6.4 0.15 73.4
2nd to 3rd 0 9.6 2.0 89 13 240.2 162.0 1455 0 6.5 0.65 100.0
3rd to 4th 0 7.8 0.0 70 1 204.6 131.0 1252 0 8.7 0.00 100.0
4th to 5th 0 11.5 3.0 55 12 155.9 98.0 574 0 13.0 1.70 70.0
5th to 6th 0 7.8 1.0 116 9 147.1 102.5 493 0 5.4 0.75 46.4
6th to 7th 0 10.6 6.0 39 27 143.0 132.0 356 0 10.6 5.30 32.7
7th to 8th 0 3.0 0.5 11 6 76.5 57.0 186 0 4.2 1.45 13.8
8th to 9th 0 0.0 0.0 0 24 24.0 24.0 24 0 0.0 0.00 0.0

Inpatient stay by first line of treatment

For patients who receive endocrine therapy as a first line of treatment the 6th to 7th line of treatment has the highest number of inpatient attendances with a mean of 3.8 and median of 3.

###Break down by first line of treatment

Length_of_treatment_1st %>% select(UPI_NUMBER,`1st_LOT`) %>% unique() %>% 
  right_join(Inpatient_summary_alllines) -> Inpatient_summary_alllines_1sttreat

#Inpatient attendances

Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Endocrine") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`endo_Minimum`=min(`Number of inpatient attendances`),
            `endo_Mean`=round2(mean(`Number of inpatient attendances`),1),
            `endo_Median` = median(`Number of inpatient attendances`),
            `endo_Maximum`= max(`Number of inpatient attendances`)) -> Inpatient_attendances_endo_sum
  
Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Chemotherapy") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`chemo_Minimum`=min(`Number of inpatient attendances`),
            `chemo_Mean`=round2(mean(`Number of inpatient attendances`),1),
            `chemo_Median` = median(`Number of inpatient attendances`),
            `chemo_Maximum`= max(`Number of inpatient attendances`)) -> Inpatient_attendances_chemo_sum
  
  
full_join(Inpatient_attendances_endo_sum,Inpatient_attendances_chemo_sum) -> Inpatient_attendances_sum_1stline

Inpatient_attendances_sum_1stline %>% 
  set_names(gsub(".*_","", names(.))) -> Inpatient_attendances_sum_1stline 


kable(Inpatient_attendances_sum_1stline, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 4, "Chemotherapy" = 4)) %>% 
  add_header_above(c(" " = 1, "Number of inpatient attendances by first line of treatment"=8))
Number of inpatient attendances by first line of treatment
Endocrine
Chemotherapy
Line of treatment Minimum Mean Median Maximum Minimum Mean Median Maximum
1st to 2nd 0 1.8 1 18 0 3.0 2.0 9
2nd to 3rd 0 2.1 1 11 0 2.3 1.5 11
3rd to 4th 0 2.2 1 15 0 2.0 1.0 10
4th to 5th 0 3.0 2 9 0 3.7 3.0 15
5th to 6th 0 2.4 1 9 0 2.7 2.0 5
6th to 7th 2 3.8 3 6 1 2.5 1.5 6
7th to 8th 0 0.0 0 0 0 2.3 3.0 4
8th to 9th NA NA NA NA 2 2.0 2.0 2

The percentage of treatment time spent as an inpatient for first line endocrine patients is highest at 4th to 5th line of treatment, similarly for patients receiving chemotherapy as a first line treatment.

#Length of stay

Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Endocrine") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`endoLOS_Minimum`=min(`Length of stay (days)`),
            `endoLOS_Mean`=round2(mean(`Length of stay (days)`),1),
            `endoLOS_Median` = median(`Length of stay (days)`),
            `endoLOS_Maximum`= max(`Length of stay (days)`)) -> Inpatient_LOS_endo_sum


Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Chemotherapy") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`chemoLOS_Minimum`=min(`Length of stay (days)`),
            `chemoLOS_Mean`=round2(mean(`Length of stay (days)`),1),
            `chemoLOS_Median` = median(`Length of stay (days)`),
            `chemoLOS_Maximum`= max(`Length of stay (days)`)) -> Inpatient_LOS_chemo_sum


full_join(Inpatient_LOS_endo_sum,Inpatient_LOS_chemo_sum) -> Inpatient_LOS_sum_1stline

Inpatient_LOS_sum_1stline %>% 
  set_names(gsub(".*_","", names(.))) -> Inpatient_LOS_sum_1stline 


kable(Inpatient_LOS_sum_1stline, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 4, "Chemotherapy" = 4)) %>% 
  add_header_above(c(" " = 1, "Length of stay (days) by first line of treatment"=8))
Length of stay (days) by first line of treatment
Endocrine
Chemotherapy
Line of treatment Minimum Mean Median Maximum Minimum Mean Median Maximum
1st to 2nd 0 10.0 0.0 197 0 9.0 5.0 57
2nd to 3rd 0 10.0 1.5 89 0 9.1 1.5 84
3rd to 4th 0 7.5 0.0 66 0 8.5 0.0 70
4th to 5th 0 12.4 2.5 55 0 10.5 9.0 35
5th to 6th 0 11.1 1.0 116 0 1.7 1.0 8
6th to 7th 3 12.2 6.0 39 0 8.5 9.0 16
7th to 8th 0 0.0 0.0 0 0 4.0 1.0 11
8th to 9th NA NA NA NA 0 0.0 0.0 0
##Line of treatment


Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Endocrine") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`endoLOT_Minimum`=min(`Time between treatments (days)`),
            `endoLOT_Mean`=round2(mean(`Time between treatments (days)`),1),
            `endoLOT_Median` = median(`Time between treatments (days)`),
            `endoLOT_Maximum`= max(`Time between treatments (days)`)) -> Inpatient_LOT_endo_sum

Inpatient_summary_alllines_1sttreat %>%
  filter(`1st_LOT`=="Chemotherapy") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`chemoLOT_Minimum`=min(`Time between treatments (days)`),
            `chemoLOT_Mean`=round2(mean(`Time between treatments (days)`),1),
            `chemoLOT_Median` = median(`Time between treatments (days)`),
            `chemoLOT_Maximum`= max(`Time between treatments (days)`)) -> Inpatient_LOT_chemo_sum


full_join(Inpatient_LOT_endo_sum,Inpatient_LOT_chemo_sum) -> Inpatient_LOT_sum_1stline


Inpatient_LOT_sum_1stline %>% 
  set_names(gsub(".*_","", names(.))) -> Inpatient_LOT_sum_1stline 


kable(Inpatient_LOT_sum_1stline, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 4, "Chemotherapy" = 4)) %>% 
  add_header_above(c(" " = 1, "Length of treatment (days) by first line of treatment"=8))
Length of treatment (days) by first line of treatment
Endocrine
Chemotherapy
Line of treatment Minimum Mean Median Maximum Minimum Mean Median Maximum
1st to 2nd 0 369.9 232.0 1774 8 136.7 127 683
2nd to 3rd 16 222.4 154.0 1378 13 284.3 169 1455
3rd to 4th 1 227.2 134.0 1252 6 168.8 126 804
4th to 5th 12 174.7 114.5 574 28 128.1 98 371
5th to 6th 9 171.3 116.0 493 70 102.0 102 177
6th to 7th 27 160.2 132.0 356 49 121.5 138 161
7th to 8th 6 6.0 6.0 6 34 100.0 80 186
8th to 9th NA NA NA NA 24 24.0 24 24
##% of treatment time spent in hospital

Inpatient_summary_alllines_1sttreat %>% 
  filter(`1st_LOT`=="Endocrine") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`endo%_Minimum %`=min(`% of treatment time as inpatient`),
            `endo%_Mean %`=round2(mean(`% of treatment time as inpatient`),1),
            `endo%_Median %` = median(`% of treatment time as inpatient`),
            `endo%_Maximum %`= max(`% of treatment time as inpatient`)) -> Inpatient_percent_inpat_endo_sum


Inpatient_summary_alllines_1sttreat %>% 
  filter(`1st_LOT`=="Chemotherapy") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`chemo%_Minimum %`=min(`% of treatment time as inpatient`),
            `chemo%_Mean %`=round2(mean(`% of treatment time as inpatient`),1),
            `chemo%_Median %` = median(`% of treatment time as inpatient`),
            `chemo%_Maximum %`= max(`% of treatment time as inpatient`)) -> Inpatient_percent_inpat_chemo_sum


full_join(Inpatient_percent_inpat_endo_sum,Inpatient_percent_inpat_chemo_sum) -> Inpatient_percent_inpat_1stline_sum

Inpatient_percent_inpat_1stline_sum %>% 
  set_names(gsub(".*_","", names(.))) -> Inpatient_percent_inpat_1stline_sum 


kable(Inpatient_percent_inpat_1stline_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 4, "Chemotherapy" = 4)) %>% 
  add_header_above(c(" " = 1, "% of treatment time as an inpatient by first line of treatment"=8))
% of treatment time as an inpatient by first line of treatment
Endocrine
Chemotherapy
Line of treatment Minimum % Mean % Median % Maximum % Minimum % Mean % Median % Maximum %
1st to 2nd 0.0 5.2 0.0 73.4 0 10.3 4.6 72.2
2nd to 3rd 0.0 6.4 0.8 63.2 0 6.9 0.4 100.0
3rd to 4th 0.0 8.7 0.0 100.0 0 8.9 0.0 77.8
4th to 5th 0.0 12.8 1.7 70.0 0 14.2 13.9 57.1
5th to 6th 0.0 7.1 0.6 46.4 0 2.3 1.0 11.4
6th to 7th 2.3 10.2 5.3 29.6 0 11.0 5.7 32.7
7th to 8th 0.0 0.0 0.0 0.0 0 5.6 2.9 13.8
8th to 9th NA NA NA NA 0 0.0 0.0 0.0

Outpatient attendances

Outpatient attendances are derived using data from SMR00 covering the time period 01/01/2013 - 31/12/2017. Outpatient records which have an clinic date between the first start date of the lines of treatment are summed as the number of outpatient attendances during line of treatment.

Patients on their first line of treatment have the highest average number of outpatient attendances - 6.4.

#Outpatient appointments
# 
# SMR00colpos <- read_excel("Z:/Oncology Data/Cancer LCDI/SMR00/SMR00 positions.xlsx")
# 
# 
# SMR00_from<- SMR00colpos$Position_from
# SMR00_to<-SMR00colpos$Position_to
# SMR00_names<-SMR00colpos$Field_name
# 
# SMR00 <- read_fwf("N:/Upload/Smr/Raw Data/S_SMR00_after.dat", fwf_positions(SMR00_from,SMR00_to,
#                                                                                      col_names = SMR00_names),guess_max =500000)
# 
# SMR00 %>% filter(CI_CHI_NUMBER %in% Cohort$UPI_NUMBER) ->SMR00
# 
# SMR00$CLINIC_DATE %>% ymd() ->SMR00$CLINIC_DATE
# 
# SMR00 %>% filter(CLINIC_DATE>"2012-12-31" & CLINIC_DATE<="2017-12-31") ->SMR00
# 
# SMR00 %>% write_xlsx("SMR00.xlsx")


read_xlsx("SMR00.xlsx",guess_max = 6000) -> SMR00

Line_of_treatment %>% 
select(UPI_NUMBER, `Line of Treatment`,first_STDATE)  %>%   spread(key=`Line of Treatment`,value=first_STDATE) %>% ungroup() %>% 
left_join(SMR00, by=c("UPI_NUMBER"="CI_CHI_NUMBER")) -> SMR00

Length_of_treatment_1st %>% select(UPI_NUMBER,`1st_LOT`,`Line of Treatment`) %>% unique() %>% full_join(SMR00) -> SMR00

ifelse(SMR00$CLINIC_DATE>=SMR00$`1st` & SMR00$CLINIC_DATE<SMR00$`2nd`,1,0)->SMR00$APPOINTMENT_1st_2nd_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`2nd` & SMR00$CLINIC_DATE<SMR00$`3rd`,1,0)->SMR00$APPOINTMENT_2nd_3rd_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`3rd` & SMR00$CLINIC_DATE<SMR00$`4th`,1,0)->SMR00$APPOINTMENT_3rd_4th_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`4th` & SMR00$CLINIC_DATE<SMR00$`5th`,1,0)->SMR00$APPOINTMENT_4th_5th_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`5th` & SMR00$CLINIC_DATE<SMR00$`6th`,1,0)->SMR00$APPOINTMENT_5th_6th_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`6th` & SMR00$CLINIC_DATE<SMR00$`7th`,1,0)->SMR00$APPOINTMENT_6th_7th_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`7th` & SMR00$CLINIC_DATE<SMR00$`8th`,1,0)->SMR00$APPOINTMENT_7th_8th_LOT
ifelse(SMR00$CLINIC_DATE>=SMR00$`8th` & SMR00$CLINIC_DATE<SMR00$`9th`,1,0)->SMR00$APPOINTMENT_8th_9th_LOT

SMR00 %>% group_by(CLINIC_ATTENDANCE) %>% summarise(`Number of patients`=n_distinct(UPI_NUMBER)) -> SMR00_attendance_status

SMR00_attendance_status$CLINIC_ATTENDANCE %>% as.factor() %>% 
  fct_recode("Patient was seen"="1",
             "Patient attended but was not seen (CNW: Could Not Wait)"="5",
             "Patient did not attend and gave no prior warning (DNA)"="8") -> SMR00_attendance_status$CLINIC_ATTENDANCE

#filter smr00 for only attended appointments

SMR00 %>% filter(CLINIC_ATTENDANCE==1)-> SMR00

### 1st to 2nd

SMR00 %>% filter(APPOINTMENT_1st_2nd_LOT==1& `Line of Treatment`=="1st") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="1st to 2nd",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_1st_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="1st") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_1st_sum) -> outpat_1st_sum

outpat_1st_sum$`Line of treatment`<-"1st to 2nd"

outpat_1st_sum$`Number of appointments`[is.na(outpat_1st_sum$`Number of appointments`)] <- 0


### 2nd to 3rd

SMR00 %>% filter(APPOINTMENT_2nd_3rd_LOT==1 &`Line of Treatment`=="2nd") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="2nd to 3rd",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_2nd_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="2nd") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_2nd_sum) -> outpat_2nd_sum

outpat_2nd_sum$`Line of treatment`<-"2nd to 3rd"

outpat_2nd_sum$`Number of appointments`[is.na(outpat_2nd_sum$`Number of appointments`)] <- 0


### 3rd to 4th

SMR00 %>% filter(APPOINTMENT_3rd_4th_LOT==1 &`Line of Treatment`=="3rd") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="3rd to 4th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_3rd_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="3rd") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_3rd_sum) -> outpat_3rd_sum

outpat_3rd_sum$`Line of treatment`<-"3rd to 4th"

outpat_3rd_sum$`Number of appointments`[is.na(outpat_3rd_sum$`Number of appointments`)] <- 0


### 4th to 5th

SMR00 %>% filter(APPOINTMENT_4th_5th_LOT==1 &`Line of Treatment`=="4th") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="4th to 5th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_4th_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="4th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_4th_sum) -> outpat_4th_sum

outpat_4th_sum$`Line of treatment`<-"4th to 5th"

outpat_4th_sum$`Number of appointments`[is.na(outpat_4th_sum$`Number of appointments`)] <- 0


### 5th to 6th

SMR00 %>% filter(APPOINTMENT_5th_6th_LOT==1 &`Line of Treatment`=="5th") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="5th to 6th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_5th_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="5th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_5th_sum) -> outpat_5th_sum

outpat_5th_sum$`Line of treatment`<-"5th to 6th"

outpat_5th_sum$`Number of appointments`[is.na(outpat_5th_sum$`Number of appointments`)] <- 0

### 6th to 7th


SMR00 %>% filter(APPOINTMENT_6th_7th_LOT==1 &`Line of Treatment`=="6th") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="6th to 7th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_6th_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="6th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_6th_sum) -> outpat_6th_sum

outpat_6th_sum$`Line of treatment`<-"6th to 7th"

outpat_6th_sum$`Number of appointments`[is.na(outpat_6th_sum$`Number of appointments`)] <- 0





### 7th to 8th

SMR00 %>% filter(APPOINTMENT_7th_8th_LOT==1 &`Line of Treatment`=="7th") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="7th to 8th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_7th_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="7th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_7th_sum) -> outpat_7th_sum

outpat_7th_sum$`Line of treatment`<-"7th to 8th"

outpat_7th_sum$`Number of appointments`[is.na(outpat_7th_sum$`Number of appointments`)] <- 0

### 8th to 9th

SMR00 %>% filter(APPOINTMENT_8th_9th_LOT==1 &`Line of Treatment`=="8th") %>% 
  group_by(UPI_NUMBER) %>%
  summarise(`Line of treatment`="8th to 9th",
            `Number of appointments`=n_distinct(EPISODE_RECORD_KEY)) -> outpat_8th_sum

Length_of_treatment_1st %>% filter(`Line of Treatment`=="8th") %>% 
  filter(Treatment_category!="Death" & Treatment_category!="No treatment") %>%
  select(UPI_NUMBER) %>% full_join(outpat_8th_sum) -> outpat_8th_sum

outpat_8th_sum$`Line of treatment`<-"8th to 9th"

outpat_8th_sum$`Number of appointments`[is.na(outpat_8th_sum$`Number of appointments`)] <- 0


#All lines

bind_rows(outpat_1st_sum,outpat_2nd_sum,outpat_3rd_sum,outpat_4th_sum,outpat_5th_sum,outpat_6th_sum,outpat_7th_sum) -> outpat_alllines

outpat_alllines %>% group_by(`Line of treatment`) %>% summarise(Minimum=min(`Number of appointments`,na.rm=TRUE),
                                                           Mean=round2(mean(`Number of appointments`,na.rm=TRUE),1),
                                                           Median=median(`Number of appointments`,na.rm=TRUE),
                                                           Maximum=max(`Number of appointments`,na.rm=TRUE)) -> outpat_alllines_1stLOT


kable(outpat_alllines_1stLOT, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Number of outpatient attendances" = 4))
Number of outpatient attendances
Line of treatment Minimum Mean Median Maximum
1st to 2nd 0 6.4 4 40
2nd to 3rd 0 5.4 3 31
3rd to 4th 0 4.7 3 24
4th to 5th 0 4.4 3 15
5th to 6th 0 4.2 4 15
6th to 7th 0 3.4 4 8
7th to 8th 0 1.3 0 5

Outpatient attendances by first line of treatment

Patients with a first line treatment of endocrine therapy have the highest number of outpatient attendances during their first line of treatment with a mean of 7.4 attendances.

However patients receiving chemotherapy as a first line of treatment have the highest number of outpatient attendances during the second line of treatment with a mean of 6.2 attendances.

### Outpatient appointments by first line of treatment

Length_of_treatment_1st %>% select(UPI_NUMBER,`1st_LOT`) %>% unique() %>% right_join(outpat_alllines) ->outpat_sum_1stLOT

outpat_sum_1stLOT %>%
  filter(`1st_LOT`=="Endocrine") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`endo_Minimum`=min(`Number of appointments`),
            `endo_Mean`=round2(mean(`Number of appointments`),1),
            `endo_Median` = median(`Number of appointments`),
            `endo_Maximum`= max(`Number of appointments`)) -> Outpatient_appointments_endo
  
outpat_sum_1stLOT %>%
  filter(`1st_LOT`=="Chemotherapy") %>% 
  group_by(`Line of treatment`) %>% 
  summarise(`chemo_Minimum`=min(`Number of appointments`),
            `chemo_Mean`=round2(mean(`Number of appointments`),1),
            `chemo_Median` = median(`Number of appointments`),
            `chemo_Maximum`= max(`Number of appointments`)) -> Outpatient_appointments_chemo


  full_join(Outpatient_appointments_endo,Outpatient_appointments_chemo) -> Outpatient_appointment_1stline_sum

Outpatient_appointment_1stline_sum %>% 
  set_names(gsub(".*_","", names(.))) -> Outpatient_appointment_1stline_sum 


kable(Outpatient_appointment_1stline_sum, booktabs=TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive","bordered"),full_width=F, position="left") %>%
  add_header_above(c(" " = 1, "Endocrine" = 4, "Chemotherapy" = 4)) %>% 
  add_header_above(c(" " = 1, "Number of outpatient attendances"=8))
Number of outpatient attendances
Endocrine
Chemotherapy
Line of treatment Minimum Mean Median Maximum Minimum Mean Median Maximum
1st to 2nd 0 7.4 5 40 0 3.0 3 10
2nd to 3rd 0 5.1 3 29 0 6.2 3 31
3rd to 4th 0 5.1 3 22 0 4.3 3 24
4th to 5th 0 4.7 3 14 0 4.1 4 15
5th to 6th 0 4.9 5 15 1 2.7 3 4
6th to 7th 0 3.0 2 8 2 4.0 4 6
7th to 8th 0 0.0 0 0 0 1.7 0 5