df <- NHANES |>
# Remember that we have to restrict to people 25 and above
filter(Age>=25) |>
# recoding of the variables we're going to use
mutate(agecat = case_when(
Age < 35 ~ "25-34",
35 <= Age & Age < 45 ~ "35-44",
Age >= 45 & Age < 55 ~ "45-54",
Age >= 55 & Age < 65 ~ "55-64",
Age >= 65 & Age < 75 ~ "65-74",
Age >= 75 ~ "75+"),
# We want College Grad to be the reference category for education, so we'll
# re-order the factor so that it is reversed from the way it came in the NHANES dataset
Education = factor(Education,
levels=rev(levels(NHANES$Education))),
# Here we collapse Hispanic and Mexican into the Hispanic category
racecat = factor(case_when(
Race1 %in% c("Hispanic", "Mexican") ~ "Hispanic",
Race1 %in% c("Asian", "Other") ~ "Other Non-Hispanic",
Race1 == "Black" ~ "Black Non-Hispanic",
Race1 == "White" ~ "White Non-Hispanic"),
levels = c("White Non-Hispanic", "Black Non-Hispanic", "Hispanic", "Other Non-Hispanic"))
) |>
# select just variables we are going to use in the analysis
select(ID, SurveyYr, Gender, Age, agecat, Education, racecat, BPSysAve, SmokeNow)