文献计量学系列30:关键词清洗与同/近义词合并2
内容涵盖文档、作者、期刊、研究机构和国家等相关文献计量学指标分析
更多自定义函数
一次性获取较多文献计量指标
让学习更轻松!
学习力,才是最大的竞争力!扫码约我吧!
一、termExtraction函数简介
termExtraction函数能够从题录数据框中的文本数据(摘要,题目和关键词等)中提取专业术语。


二、关键词清洗与术语提取
DEID <- tableTag(m,Tag = 'DEID',sep = ';') %>% as.data.frame(stringsAsFactors = F)export(DEID,file = 'E:/精鼎统计/DEID.xlsx')然后将导出的关键词按字母排序(如图2)。





synonyms_other_data <- import(file = 'DEID.xlsx',which = 'synonyms')head(synonyms_other_data)# from to# 1 Acid Acidification# 2 Altitudinal Altitude# 3 Base flow Baseflow# 4 Base flow index Baseflow# 5 Biogeochemical Biogeochemistry# 6 Canadian Canada# from <- toupper(synonyms_other_data$from)to <- toupper(synonyms_other_data$to)keep.to <- unique(to)#构建自定义函数synonfun <- function(from,to,keep,i){ fw = from[which(to == keep[i])] fw1 = paste0(a,collapse = ';') return(fw1) }
synon <- c()for (i in 1:length(keep.to)) { synon = c(synon, synonfun(from, to, keep.to, i)) }
synonyms <- paste(keep.to, synon, sep = ';')head(synonyms)# [1] "ACIDIFICATION;ACID" "ALTITUDE;ALTITUDINAL" # [3] "BASEFLOW;BASE FLOW;BASE FLOW INDEX" "BIOGEOCHEMISTRY;BIOGEOCHEMICAL" # [5] "CANADA;CANADIAN" "CLIMATE CHANGE;CHANGING CLIMATE;CLIMATIC CHANGE"keepKW <- import(file = 'DEID.xlsx',which = 'keep') %>% .[,1] removeKW <- import(file = 'DEID.xlsx',which = 'remove') %>% .[,1]m1 <- mm1$DE <- m1$DEID#自定义函数termExtraction1只是对termExtraction函数做了少量的修改,如图1,大家可要自己试一试m1_TE <- termExtraction1(m1, Field = 'DE', stemming = FALSE, language = 'english', keep.terms = keepKW, synonyms = synonyms, remove.numbers = TRUE, remove.terms = removeKW, verbose = TRUE)# CATCHMENT RUNOFF STABLE-ISOTOPE GROUNDWATER PRECIPITATION RUNOFF-GENERATION TRACER # 249 165 164 136 120 115 106 # MODEL RIVER SOIL BASIN OXYGEN-STABLE-ISOTOPE DEUTERIUM FLOW # 103 98 96 93 92 85 82 # STORM SNOWMELT FOREST GLACIER STREAM GENERATION CHEMISTRY # 82 68 65 60 53 52 51 # FLOWPATH TRANSPORT MOUNTAIN HEADWATER # 49 47 45 40
#删除重复m1_TE$DE_TM <- lapply(strsplit(m1_TE$DE_TM,';'), unique) %>% lapply(paste0, collapse = ';') %>% unlist()head(m1_TE$DE_TM)# [1] "HYDROGEOLOGY;KARST;HYDROCHEMISTRY;NATURAL-TRACER;TEMPORAL;DISTRIBUTION;MEDITERRANEAN;SALINE;GROUNDWATER;SR;CARBONATE;AQUIFER;STABLE-ISOTOPE;EVOLUTION;BASIN" # [2] "STABLE-ISOTOPE;SPATIAL;VARIATION;NAQU;RIVER;BASIN;QINGHAI;xizang;PLATEAU;TEMPORAL;CALABRIA;PRECIPITATION;OXYGEN-STABLE-ISOTOPE;COMPONENT;DEUTERIUM;CHEMISTRY" # [3] "SUTRI;DHAKA;CHANDRA;BASIN;HIMALAYA;STABLE-ISOTOPE;SPECIFIC;ABLATION;CHHOTA;SHIGRI;GLACIER;MELT;CONTRIBUTION;MASS-BALANCE;UPPER;GANGA;DEUTERIUM;CHORABARI;HIMACHAL;PRADESH;SOURCE;RIVER"# [4] "RUNOFF-GENERATION;TRACER;FLOWPATH;UNCERTAINTY;THREE-COMPONENT;GROUNDWATER;CATCHMENT;CHEMISTRY;TWO-COMPONENT;DISCHARGE" # [5] "ACCESSIBLE;RESIDENCE-TIME;QUALITY;CHLORIDE;CATCHMENT;VARIATION;TRANSPORT;DYNAMIC;PRECIPITATION;FOREST" # [6] "GROUNDWATER;RECHARGE;MONSOON;GLACIER;CLIMATE-CHANGE;COMPONENT;TIANSHAN;MOUNTAIN;STABLE-ISOTOPE;SOIL;MOISTURE;BASIN;PALEOALTIMETRY;FRACTIONATION"三、小结
赞 (0)
