❝诺维医学科研官网:https://www.newboat.top 更新换版中!
B站:文章对应的讲解视频在此。熊大学习社 https://space.bilibili.com/475774512
微信公众号|Bilibili|抖音|小红书|知乎|全网同名:熊大学习社
公益网站,https://nwzz.xyz/ ,内有医学离线数据库、数据提取、科研神器等高质量资料库
诺维之舟AI:https://gpt4.nwzz.xyz 可在线使用GPT4|GPT3.5|Midjourney
课程相关资料:
(1)课程资料包括CHARLS数据库常用变量提取代码讲义。关注公众号“熊大学习社”,回复“charls2406”,获取资料。
我们坚持学以致用,做有质量的分享。关注熊大学习社,您的一键三连是我最大的动力。
(2)一对一论文指导学员免费获取学习课程和专属答疑。了解咨询扫客服二维码。
年龄 age/ 性别 sex / 教育 education/ 婚姻 marriage/ 住所 residence
baseline_demo<-read_dta('2015/Demographic_Background.dta')baseline_demo %>% transmute(ID, age= 2015-ba004_w3_1, gender=ifelse(ba000_w2_3==1,'Male','Female'), education=ifelse(bd001_w2_4>=6,'high school+', ifelse(bd001_w2_4==1, 'Illiterate', ifelse(bd001_w2_4 %in% c(2,3,4), 'primary','middle school'))), marital = ifelse(be001 %in% c(1,2), 'married','single'), residence = ifelse(bb001_w3_2 %in% c(1,2),'urban','rural') )
read_dta('2015/Biomarker.dta')%>%mutate(height = ifelse(qi002>300|qi002<100, NA, qi002), weight = ifelse(ql002>300, NA, ql002), bmi = weight/(height/100)^2)# 肥胖分类obesity = ifelse(bmi>=24,'Yes','No')# 腰围waist = ifelse(qm002>=500,NA,qm002)
# 第一种方法,最大值read_dta('2015/Biomarker.dta')%>%mutate(g1 = ifelse(qc003>300, NA, qc003), g2 = ifelse(qc004>300, NA, qc004), g3 = ifelse(qc005>300, NA, qc005), g4 = ifelse(qc005>300, NA, qc005), grip = pmax(g1,g2,g3,g4,na.rm = T), )# 第二种方法,均值grip_strength = rowMeans(select(., g1:g4), na.rm = TRUE)
# 第一种方式read_dta('2015/health_status_and_functioning.dta') %>% mutate( smoking = ifelse(da059==1, 'yes','no' ))# 第二种方式read_dta('2015/health_status_and_functioning.dta') %>% mutate( smoking = ifelse(da059==2,'Non-smoker', ifelse(da061==2,'Ex-smoker','Smoker')))# 更多信息read_dta('2015/health_status_and_functioning.dta') %>% mutate( # 吸烟分类 smoking = ifelse(da059==2,'Non-smoker', ifelse(da061==2,'Ex-smoker','Smoker')), # 吸烟时间 da065_1, # 吸烟年份 da065_2, # 吸烟时间 smoking_duration = ifelse(!is.na(da065_1),age-da065_1,2011-da065_2), # 吸烟时间分类 smoking_duration_cat = cut(smoking_duration, breaks = c(-Inf, 19, 39, Inf), labels = c("<20", "20-39", ">=40")), # 吸烟量 smoking_num = da063, # 吸烟量分类 smoking_num_cat = cut(smoking_num, breaks = c(-Inf, 19, 39, Inf), labels = c("<20", "20-39", ">=40")), # 开始吸烟年龄 age_str_smk = ifelse(!is.na(da065_1), da065_1, age-(2011-da065_2)), # 开始吸烟年龄分组 age_str_smk_cat = cut(age_str_smk, breaks = c(-Inf, 12, 17,25, Inf), labels = c("<13", "13-17",'18-25', ">=25")), # 结束吸烟年龄 age_quit_smk = ifelse(!is.na(da062_1), da062_1, age-(2011-da062_2)), # 吸烟累计包数:吸烟时间*吸烟量/20根一包 smoking_pac_yr = ifelse(smoking=='Non-smoker',0, smoking_duration*da063/20), # 吸烟包数分类 smoking_pac_yr_cat = cut(smoking_pac_yr, breaks=c(-Inf,0,14,29,Inf), labels = c('0','<15','15-30','>=30')), # 结束吸烟分组 age_quit_smk_cat = case_when( age_quit_smk < 40 ~ "<40", age_quit_smk < 50 ~ "<50", age_quit_smk < 60 ~ "<60", age_quit_smk >= 60 ~ '>=60') )
read_dta('2015/health_status_and_functioning.dta') %>% mutate(drinking = ifelse(da067==1,'yes','no'))
functional limitation [activities of daily living (ADL),功能限制[日常生活活动
instrumental activities of daily living (IADL),日常生活工具活动
# 第一种方法,积分read_dta('2015/Health_Status_and_Functioning.dta')%>%mutate(Func_limt = db010+db011+db012+db013+db014+db015+db016+db017+db018+db019+db020)# ADLADL_disability = ifelse(db010==4|db011==4|db012==4|db013==4|db014==4|db015==4,1,0),# IADLIADL_disability = ifelse(db016==4|db017==4|db018==4|db019==4|db020==4,1,0),
从health_status_and_functioning提取。
# 第一种方法depression = (dc009-1)+(dc010-1)+(dc011-1)+(dc012-1)+(dc014-1)+(dc015-1)+(dc017-1)+(dc018-1)# 第二种方法CESD = as.numeric(dc009)+as.numeric(dc010)+ as.numeric(dc011)+as.numeric(dc012)+(-as.numeric(dc013)+5)+ as.numeric(dc014)+as.numeric(dc015)+(-as.numeric(dc016)+5)+ as.numeric(dc017)+as.numeric(dc018)-10,depression = ifelse(CESD>=10,'Y','N'),
Health_Status_and_Functioning中提取。
health = case_when( da001 %in% c(1,2,3)|da002 %in% c(1,2)~'Good', da001==4|da002==3 ~'Fair', da001==5|da002 %in% c(4,5)~'Poor')
包括身体虚弱、消耗、体重减轻等,从Health_Status_and_Functioning中提取。
weakness=ifelse(db008==1,0,1),snowness = ifelse((db003==1&db005==1)|is.na(db003)|is.na(db005),0,1),exhaustion = ifelse(dc018 %in% c(3,4)|dc012 %in% c(3,4),1,0),lowpa = ifelse(da051_2_==2|da051_3_==2,1,0),weightloss = ifelse(da047==2,1,0)sum_vars = rowSums(select(., weakness, snowness, exhaustion, lowpa, weightloss), na.rm = TRUE),Frailty = ifelse(sum_vars>=3,1,0),
14个,Health_Status_and_Functioning中提取。
# 全部提取read_dta('2015/Health_Status_and_Functioning.dta')%>%mutate(chronic = rowSums(select(., starts_with("zda007_")), na.rm = TRUE))# 分类提取# 心血管疾病,心脏病和中风CVD = ifelse(da007_8_==1|da007_7_==1,'Yes','No')# 心脏病Heartfailure = ifelse(da007_7_==1,'Yes','No')# 中风Stroke = ifelse(da007_8_==1,'Yes','No')# 逐个提取HTN = case_when( zda007_1_==1|da007_1_==1|da007_w2_2_1_==1~'Y', TRUE~'N'),CA = case_when( zda007_4_==1|da007_4_==1|da007_w2_2_4_==1~'Y', TRUE~'N'),CLD = case_when( zda007_5_==1|da007_5_==1|da007_w2_2_5_==1~'Y', TRUE~'N'),HD = case_when( zda007_7_==1|da007_7_==1|da007_w2_2_7_==1~'Y', TRUE~'N'),STK = case_when( zda007_8_==1|da007_8_==1|da007_w2_2_8_==1~'Y', TRUE~'N'),MentalD = case_when( zda007_11_==1|da007_11_==1|da007_w2_2_11_==1~'Y', TRUE~'N'),AR = case_when( zda007_13_==1|da007_13_==1|da007_w2_2_13_==1~'Y', TRUE~'N'),DL = case_when( zda007_2_==1|da007_2_==1|da007_w2_2_2_==1~'Y', TRUE~'N'),LiverD = case_when( zda007_6_==1|da007_6_==1|da007_w2_2_6_==1~'Y', TRUE~'N'),CKD = case_when( zda007_9_==1|da007_9_==1|da007_w2_2_9_==1~'Y', TRUE~'N'),DigestD = case_when( zda007_10_==1|da007_10_==1|da007_w2_2_10_==1~'Y', TRUE~'N'),Asthma = case_when( zda007_14_==1|da007_14_==1|da007_w2_2_14_==1~'Y', TRUE~'N')
从health_status_and_functioning和biomarkers两个表提取。
# 收缩压1sbp1 = ifelse(qa003>300,NA,qa003),# 收缩压2sbp2 = ifelse(qa007>300,NA,qa007),# 收缩压3sbp3 = ifelse(qa011>300,NA,qa011),# 舒张压1dbp1 = ifelse(qa004>200,NA,qa004),# 舒张压2dbp2 = ifelse(qa008>200,NA,qa008),# 舒张压3dbp3 = ifelse(qa012>200,NA,qa012), # 收缩压,均值sbp=(sbp1+sbp2+sbp3)/3,# 舒张压,均值dbp=(dbp1+dbp2+dbp3)/3,# 高血压,HypertensionHypertension = ifelse((sbp<140&dbp<90)&(is.na(da007_1_)|da007_1_!=1)& (is.na(da011s1)|(da011s1!=1))&(is.na(da011s2)|da011s2!=1),'No','Yes')
从Health_status_and_functioning和Blood两个表提取。
# 糖尿病,DiabetesDiabetes = ifelse((newhba1c<6.5)&(is.na(da007_3_)|da007_3_!=1)&(is.na(da014s1)|da014s1!=1)& (is.na(da014s2)|da014s2!=1),'No','Yes')
从Blood两个表提取。
# 血脂症,DyslipidemiaDyslipidemia = ifelse(newcho>=6.19*38.6|newtg>=2.27*88.5|newldl>=4.14*38.6,'Yes','No')
从Demographic_Background和Exit_Interview提取。
# 全因死亡all_cause_mortality = ifelse(!is.na(exb001_1), 1, ifelse(!is.na(eligible),0,NA)),# 生存时间follow_up_time = ifelse(all_cause_mortality==1, exb001_1-2011,2),# 死亡年龄death_age = exb001_1-ba002_1,# 是否早亡premature_death = ifelse((all_cause_mortality==1&(gender=='Male') &(death_age<72.7))| ( all_cause_mortality==1&(gender=='Female')&(death_age<76.9)),1,0)
从Health_status_and_functioning提取。
socialactivity = case_when(da056s12==11~'N',TRUE~'Y')
从Health_status_and_functioning提取。
sleepquality = factor(dc015,levels=c(1,2,3,4), labels=c('Rarely or none of the time', 'Some or a little of the time', 'Occasionally or a moderate amount of the time', 'Most or all of the time'))# 睡眠时间,2013年数据sleephr = da049
从Health_status_and_functioning提取。
# 第一种方法life_satisfy = ifelse(dc028 %in% 1:2,'Good', ifelse(dc028 %in% 3:4,'Fair','Poor')),# 第二种方法life_satisfy = (-as.numeric(dc028))+5,
从Health_status_and_functioning提取。
# 2013年数据hearing = ifelse(da039 %in% 1:3,'Good', ifelse(da039==4,'Fair','Poor')),vision = ifelse(da033 %in% 1:3|da034 %in% 1:3,'Good', ifelse(da039==4|da034 %in% 1:3,'Fair','Poor')),
从Health_status_and_functioning提取。
# 2013年数据pain = ifelse(rowSums(is.na(select(., starts_with("da042s"))))==15,0,1),
从Blood两个表提取。
# 葡萄糖glucose = newglu,# 甘油三脂TG = newtg*0.011,
**VAI ,内脏脂肪指数(VAI)**由人体学测量指标和血脂指标组成。计算方法如图
CVAI = ifelse(gender=='Male', -267.93+0.68*age+0.03*BMI+4*WC+22*log10(TG)-16.32*hdl, -187.32+1.71*age+4.23*BMI+1.12*WC+39.76*log10(TG)-11.66*hdl)
# 第一种方法,2018年数据congnition<-read_dta('2018/Cognition.dta') %>% mutate( mental = (dc001_w4==1)+(dc002_w4==1)+(dc003_w4==1)+(dc005_w4==1)+(dc006_w4==1)+ (dc014_w4_1_1==93)+(dc014_w4_2_1==86)+(dc014_w4_3_1==79)+(dc014_w4_4_1==72)+ (dc014_w4_5_1==65)+(dc024_w4==1), immediate = ((dc028_w4_s1==1)+(dc028_w4_s2==2)+(dc028_w4_s3==3)+(dc028_w4_s4==4)+ (dc028_w4_s5==5)+(dc028_w4_s6==6)+(dc028_w4_s7==7)+(dc028_w4_s8==8)+ (dc028_w4_s9==9)+(dc028_w4_s10==10)+(dc029_w4_s1==1)+(dc029_w4_s2==2)+ (dc029_w4_s3==3)+(dc029_w4_s4==4)+(dc029_w4_s5==5)+(dc029_w4_s6==6)+ (dc029_w4_s7==7)+(dc029_w4_s8==8)+(dc029_w4_s9==9)+(dc029_w4_s10==10))/2, delayed =(dc047_w4_s1==1)+(dc047_w4_s2==2)+(dc047_w4_s3==3)+(dc047_w4_s4==4)+ (dc047_w4_s5==5)+(dc047_w4_s6==6)+(dc047_w4_s7==7)+(dc047_w4_s8==8)+ (dc047_w4_s9==9)+(dc047_w4_s10==10), recall = (immediate+delayed)/2, follow_up_cognition = recall+mental)# 第二种方法,2013年数据VS = ifelse(dc025==1,1,0),delay = (dc027s1 %in% 1)+(dc027s2%in%2)+(dc027s3%in%3)+ (dc027s4%in%4)+(dc027s5%in%5)+(dc027s6%in%6)+(dc027s7%in%7)+ (dc027s8%in%8)+(dc027s9%in%9)+(dc027s10%in%10),immediate =(dc006_1_s1%in%1)+(dc006_1_s2%in%2)+(dc006_1_s3%in%3)+ (dc006_1_s4%in%4)+(dc006_1_s5%in%5)+(dc006_1_s6%in%6)+(dc006_1_s7%in%7)+ (dc006_1_s8%in%8)+(dc006_1_s9%in%9)+(dc006_1_s10%in%10),memory = (immediate+delay)/2,orientation = (dc001s1%in%1)+(dc001s2%in%2)+(dc001s3%in%3)+(dc002%in%1)+ (dc003%in%1), attention = (dc019%in%93)+(dc020%in%86)+(dc021%in%79)+(dc022%in%72)+ (dc023%in%65),cognition = memory+orientation+attention+VS,
课程相关资料:
(1)课程资料包括CHARLS数据库常用变量提取代码讲义。关注公众号“熊大学习社”,回复“charls2406”,获取资料。
我们坚持学以致用,做有质量的分享。关注熊大学习社,您的一键三连是我最大的动力。
(2)一对一专属论文指导,一篇SCI指导直到录用,具体扫码咨询助理老师。