n2015_1.Rmd

---
title: "n2015.1"
output: word_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
library(haven);library(tidyverse);library(survey)
```


```{r get the core data for Q1/Q3}
df <- read_stata("G:/NIS_2015_Core.dta")

names(df)
```

This is the first 3 quarters for 2015. This is in ICD9 format. The Core file does not contain the dx and pr codes. So, we need to get these codes and merge with df file first.


```{r get the data that contains dx and pr codes}

dx <- read_stata("G:/NIS_2015Q1Q3_DX_PR_GRPS.dta")

```


Merging data to get the dx and pr codes into the main dataframe:

```{r we need to merge}

# we need to merge and keep only those observations that are in the dx df as those are the ones that are q1 - q3

df1 <- left_join(dx, df, by = "KEY_NIS")

df1

```

Now, prior to further analysis, remove the variables that are not needed in this study. 
Also:
  
1. Keep only primary surgery; remove patients with prior CABG 
2. Remove patients with concomitant valve surgery

```{r}

n15.1 <- df1



pricabg <- as.character(c("V4581")) # prior MI 

a <- pricabg

n15.1$pricabg <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))

n15.1 %>% count(pricabg)


# isolate CABG cases from this dataframe.

cabg <- as.character(c('3610','3611','3612','3613','3614','3615','3617','3619'))

n15.1$cabg <- with(n15.1,ifelse((PR1 %in% cabg | PR2 %in% cabg | PR3 %in% cabg | PR4 %in% cabg | PR5 %in% cabg),"yes","no"))

n15.1 %>% count(cabg)
```


Remove patients with prior CABG:
  
```{r remove prior CABG patients}
df2 <- n15.1 %>% filter(cabg == "yes")

df3 <- df2 %>% filter(pricabg == "no")

dim(df3) # df2 contains only patients with primary CABG surgery and no prior CABG surgery.
```

Remove patients with concomitant valve surgery too:


```{r remove patients undergoing concomitant valve surgery}

# valve replacement/ valve repair 



valve <- as.character(c('3511','3512','3513','3514','3521','3522','3523','3524','3526','3525','3527',
'3528'))

a <- valve

df3$valve <- with(df3, ifelse((PR1 %in% a | PR2 %in% a | PR3 %in% a | PR4 %in% a | PR4 %in% a | PR5 %in% a | PR6 %in% a | PR7 %in% a | PR8 %in% a | PR10 %in% a | PR11 %in% a | PR12 %in% a | PR13 %in% a | PR14 %in% a | PR15 %in% a), "yes","no"))

df3 %>% count(valve)



df4 <- df3 %>% filter(valve == "no")
dim(df4)
```







```{r change name of df to get more variables}

n15.1 <- df4

```

```{r prior conditions}

priormi <- as.character(c("412")) # prior MI 

a <- priormi

n15.1$priormi <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))

n15.1 %>% count(priormi)

```

```{r}
 
priorpci <- as.character(c("V4582")) # prior PCI 

a <- priorpci

n15.1$priorpci <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a), "yes","no"))

n15.1 %>% count(priorpci)

```


```{r}
chf <- as.character('4280','4281','4282','4283','4284','4285','4286','4287','4288') # ICD9 codes for CHF

a <- chf

n15.1$chf <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a), "yes","no"))

n15.1 %>% count(chf)
```


```{r}

shock <- as.character(c("78551")) # shock

a <- shock

n15.1$shock <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))

n15.1 %>% count(shock)
```

```{r}

stemi <- as.character(c("41071")) # stemi

a <- stemi

n15.1$stemi <- with(n15.1, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))



n15.1 %>% count(stemi)

```

```{r change df name to get more variables }
df <- n15.1
```


```{r carotid disease}
carotid.d <- as.character(c("43310")) # carotid artery disease

a <- carotid.d

df$carotid <- with(df, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a), "yes","no"))

table(df$carotid)
```

```{r}

pristroke <- as.character(c("V1254","4380")) # prior stroke

a <- pristroke

df$pristroke <- with(df, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))


table(df$pristroke)
```

```{r}
priicd <- as.character(c("V4502")) # prior ICD implant 

a <- priicd

df$priicd <- with(df, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))

table(df$priicd)
```


```{r}
dementia <- as.character(c("2900","2941","2942","2948","3310","3311","3312","33182")) # dementia 

a <- dementia

df$dementia <- with(df, ifelse((DX1 %in% a | DX2 %in% a | DX3 %in% a | DX4 %in% a | DX4 %in% a | DX5 %in% a | DX6 %in% a | DX7 %in% a | DX8 %in% a | DX10 %in% a | DX11 %in% a | DX12 %in% a | DX13 %in% a | DX14 %in% a | DX15 %in% a ), "yes","no"))

table(df$dementia)

```

```{r get severity and hospital dataframes}

sev <- read_stata('G:/NIS_2015Q1Q3_Severity.dta')

hosp <- read_stata("G:/NIS_2015_Hospital.dta")

```

```{r get severity into the df}



# merge

df_1 <- merge(df, sev, by = "KEY_NIS")

df_2 <- merge(df_1, hosp, by = "HOSP_NIS")

names(df_2) <- tolower(names(df_2))
names(df_2)

```

```{r}
# use of BITA for CABG

bita <- as.character(c('3616'))

df_2$bita <- with(df_2,ifelse((pr1 %in% bita | pr2 %in% bita | pr3 %in% bita | pr4 %in% bita | pr5 %in% bita),"yes","no"))

table(df_2$bita)
```



```{r save df for now}

write_csv(df_2, "H:/bita_nis/df2015.1.csv")

```

```{r}


m1 <- df_2 %>% dplyr::select(age,  died, discwt.x,  dispuniform,  drg, dx1, 
dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10,
dx11, dx12, dx13, dx14, dx15, female, key_nis, los, nis_stratum.x, 
pay1,   pr1, pr2, 
pr3, pr4, pr5, pr6, pr7, pr8, pr9,pr10, pr11, 
pr12, pr13, pr14, pr15,  race, totchg, 
 year.x,  cabg, 
 bita, pricabg, valve, priormi, priorpci, 
chf, shock, stemi, cm_aids, cm_alcohol, 
cm_anemdef, cm_arth, cm_bldloss, cm_chf, cm_chrnlung, 
cm_coag, cm_depress, cm_dm, cm_dmcx, cm_drug, cm_htn_c, 
cm_hypothy, cm_liver, cm_lymph, cm_lytes, cm_mets, 
cm_neuro, cm_obese, cm_para, cm_perivasc, cm_psych, 
cm_pulmcirc, cm_renlfail, cm_tumor, cm_ulcer, cm_valve, 
cm_wghtloss, hosp_nis,  discwt.y, 
hosp_bedsize,  hosp_locteach, 
hosp_region,  nis_stratum.y, year.y)

names(m1)
```

```{r}
write_csv(m1, "H:/bita_nis/df/m2015_1.csv")
```