From 9fbb46a9866edbf89c791f890f21f3c1601f4d2f Mon Sep 17 00:00:00 2001 From: massimoaria Date: Fri, 16 Jun 2023 22:06:47 +0200 Subject: [PATCH] issue #361: Solved Now convert2df works with the new dimensions csv format --- R/dimensions2df.R | 27 +++++++++++++++++++++++++-- R/missingData.R | 4 ++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/R/dimensions2df.R b/R/dimensions2df.R index d82fe871..b5b43766 100644 --- a/R/dimensions2df.R +++ b/R/dimensions2df.R @@ -69,6 +69,20 @@ dimensions2df <- function(file, format = "csv") { } } names(DATA) = fields + ind <- which(names(DATA)=="Source title/Anthology title") + if (length(ind)==1){ + names(DATA)[ind] <- "SO" + } + ind <- which(names(DATA)=="Authors Affiliations Name of Research organization" ) + if (length(ind)==1){ + names(DATA)[ind] <- "C1" + DATA$AU_UN <- DATA$C1 + } + ind <- which(names(DATA)== "Authors Affiliations Country of Research organization") + if (length(ind)==1){ + names(DATA)[ind] <- "AU_CO" + } + DATA <- postprocessingDim(DATA) @@ -82,7 +96,7 @@ postprocessingDim <- function(DATA) { ## Converting original references in WOS format (AU, PY, SO, VOL, NUM, DOI) if ("Cited references" %in% names(DATA)) { - aaa <- strsplit(DATA$Cited.references, ";\\[") + aaa <- strsplit(DATA$`Cited references`, ";\\[") cr <- (unlist(lapply(aaa, function(l) { l <- gsub("\\|", "!!!", l) l <- strsplit(l, "!!!") @@ -235,7 +249,16 @@ postprocessingDim <- function(DATA) { DATA$DB <- "DIMENSIONS" - DATA <- metaTagExtraction(DATA, "AU_CO") + if (!"AU_CO" %in% names(DATA)) DATA <- metaTagExtraction(DATA, "AU_CO") + + DATA$AU1_CO <- unlist(lapply(strsplit(DATA$AU_CO,";"), function(l){ + if (length(l)>0){ + l <- l[1] + } else { + l <- "NA" + } + return(l) + })) DATA <- metaTagExtraction(DATA, "AU1_CO") return(DATA) diff --git a/R/missingData.R b/R/missingData.R index 27239af6..5c6697c4 100644 --- a/R/missingData.R +++ b/R/missingData.R @@ -37,13 +37,13 @@ missingData <- function(M) { # create a vector with the tags tag <- unlist( strsplit( - "AB,AU,C1,CR,DE,DI,DT,ID,LA,NR,PY,RP,SO,TC,TI,WC","," + "AB,AU,C1,CR,DE,DI,DT,ID,LA,PY,RP,SO,TC,TI,WC","," ) ) # create a vector with the description of the tags description <- trimws(unlist( strsplit( - "Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language,Number of Cited References, + "Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language, Publication Year,Corresponding Author, Journal, Total Citation, Title, Science Categories", "," ) ))