Imports and Login
import dimcli
import pandas as pd
from funcy import chunks
from clj import mapcat
import json
dimcli.login()
dsl = dimcli.Dsl()An interesting question that arises for preprints is whether they are later published in a peer-reviewed journal. Dimensions enriches the metadata of preprints with the DOI of the resulting publication.
import dimcli
import pandas as pd
from funcy import chunks
from clj import mapcat
import json
dimcli.login()
dsl = dimcli.Dsl()preprints_df = pd.read_csv(INPUT_FILE)
resulting_publication_dois = list(preprints_df.dropna(subset=["resulting_publication_doi"]).resulting_publication_doi.values)The additional metadata for the resulting publication are queried below using their DOIs.
CHUNK_SIZE = 400
data = []
q = "search publications where doi in {} return publications [id + doi + title + year + journal + altmetric + times_cited + recent_citations + type + issn + linkout]"
q = q + f" limit {CHUNK_SIZE*2}"
for dois in chunks(CHUNK_SIZE, resulting_publication_dois):
results = dsl.query(
q.format(json.dumps(dois)))
data.append(results)
resulting_publications_df = pd.DataFrame(mapcat(lambda x: x["publications"], data))resulting_publications_df = resulting_publications_df\
.join(
pd.json_normalize(resulting_publications_df.journal.apply(
lambda x: eval(str(x)) if x and not pd.isna(x) else x))
.add_prefix("journal."))\
.drop(columns="journal")resulting_publications_df.to_csv(OUTPUT_FILE, index=None)