文献计量学系列30:关键词清洗与同/近义词合并2
内容涵盖文档、作者、期刊、研究机构和国家等相关文献计量学指标分析
更多自定义函数
一次性获取较多文献计量指标
让学习更轻松!
学习力,才是最大的竞争力!扫码约我吧!
一、termExtraction函数简介
termExtraction函数能够从题录数据框中的文本数据(摘要,题目和关键词等)中提取专业术语。
二、关键词清洗与术语提取
DEID <- tableTag(m,Tag = 'DEID',sep = ';') %>% as.data.frame(stringsAsFactors = F)
export(DEID,file = 'E:/精鼎统计/DEID.xlsx')
然后将导出的关键词按字母排序(如图2)。
synonyms_other_data <- import(file = 'DEID.xlsx',which = 'synonyms')
head(synonyms_other_data)
# from to
# 1 Acid Acidification
# 2 Altitudinal Altitude
# 3 Base flow Baseflow
# 4 Base flow index Baseflow
# 5 Biogeochemical Biogeochemistry
# 6 Canadian Canada
#
from <- toupper(synonyms_other_data$from)
to <- toupper(synonyms_other_data$to)
keep.to <- unique(to)
#构建自定义函数
synonfun <- function(from,to,keep,i){
fw = from[which(to == keep[i])]
fw1 = paste0(a,collapse = ';')
return(fw1)
}
synon <- c()
for (i in 1:length(keep.to)) {
synon = c(synon, synonfun(from, to, keep.to, i))
}
synonyms <- paste(keep.to, synon, sep = ';')
head(synonyms)
# [1] "ACIDIFICATION;ACID" "ALTITUDE;ALTITUDINAL"
# [3] "BASEFLOW;BASE FLOW;BASE FLOW INDEX" "BIOGEOCHEMISTRY;BIOGEOCHEMICAL"
# [5] "CANADA;CANADIAN" "CLIMATE CHANGE;CHANGING CLIMATE;CLIMATIC CHANGE"
keepKW <- import(file = 'DEID.xlsx',which = 'keep') %>% .[,1]
removeKW <- import(file = 'DEID.xlsx',which = 'remove') %>% .[,1]
m1 <- m
m1$DE <- m1$DEID
#自定义函数termExtraction1只是对termExtraction函数做了少量的修改,如图1,大家可要自己试一试
m1_TE <- termExtraction1(m1, Field = 'DE', stemming = FALSE, language = 'english',
keep.terms = keepKW, synonyms = synonyms, remove.numbers = TRUE,
remove.terms = removeKW,
verbose = TRUE)
# CATCHMENT RUNOFF STABLE-ISOTOPE GROUNDWATER PRECIPITATION RUNOFF-GENERATION TRACER
# 249 165 164 136 120 115 106
# MODEL RIVER SOIL BASIN OXYGEN-STABLE-ISOTOPE DEUTERIUM FLOW
# 103 98 96 93 92 85 82
# STORM SNOWMELT FOREST GLACIER STREAM GENERATION CHEMISTRY
# 82 68 65 60 53 52 51
# FLOWPATH TRANSPORT MOUNTAIN HEADWATER
# 49 47 45 40
#删除重复
m1_TE$DE_TM <- lapply(strsplit(m1_TE$DE_TM,';'), unique) %>% lapply(paste0, collapse = ';') %>% unlist()
head(m1_TE$DE_TM)
# [1] "HYDROGEOLOGY;KARST;HYDROCHEMISTRY;NATURAL-TRACER;TEMPORAL;DISTRIBUTION;MEDITERRANEAN;SALINE;GROUNDWATER;SR;CARBONATE;AQUIFER;STABLE-ISOTOPE;EVOLUTION;BASIN"
# [2] "STABLE-ISOTOPE;SPATIAL;VARIATION;NAQU;RIVER;BASIN;QINGHAI;TIBET;PLATEAU;TEMPORAL;CALABRIA;PRECIPITATION;OXYGEN-STABLE-ISOTOPE;COMPONENT;DEUTERIUM;CHEMISTRY"
# [3] "SUTRI;DHAKA;CHANDRA;BASIN;HIMALAYA;STABLE-ISOTOPE;SPECIFIC;ABLATION;CHHOTA;SHIGRI;GLACIER;MELT;CONTRIBUTION;MASS-BALANCE;UPPER;GANGA;DEUTERIUM;CHORABARI;HIMACHAL;PRADESH;SOURCE;RIVER"
# [4] "RUNOFF-GENERATION;TRACER;FLOWPATH;UNCERTAINTY;THREE-COMPONENT;GROUNDWATER;CATCHMENT;CHEMISTRY;TWO-COMPONENT;DISCHARGE"
# [5] "ACCESSIBLE;RESIDENCE-TIME;QUALITY;CHLORIDE;CATCHMENT;VARIATION;TRANSPORT;DYNAMIC;PRECIPITATION;FOREST"
# [6] "GROUNDWATER;RECHARGE;MONSOON;GLACIER;CLIMATE-CHANGE;COMPONENT;TIANSHAN;MOUNTAIN;STABLE-ISOTOPE;SOIL;MOISTURE;BASIN;PALEOALTIMETRY;FRACTIONATION"
三、小结
赞 (0)