@inbook{49ee6a589a7a4991b1af035567f13c3a,
title = "Topic Taxonomy Construction from ESG Reports",
abstract = "The surge in Environmental, Societal, and Governance (ESG) reports, essential for corporate transparency and modern investments, presents a challenge for investors due to their varying lengths and sheer volume. We present a novel methodology, called MultiTaxoGen, for creating topic taxonomies designed specifically for analysing the ESG reports. Topic taxonomies serve to illustrate topics covered in a corpus of ESG reports while also highlighting the hierarchical relationships between them. Unfortunately, current state-of-the-art approaches for constructing topic taxonomies are designed for more general datasets, resulting in ambiguous topics and the omission of many latent topics presented in ESG-focused corpora. This makes them unsuitable for the specificity required by investors. Our method instead adapts topic modelling techniques by employing them recursively on each topic{\textquoteright}s local neighbourhood, the subcorpus of documents assigned to that topic. This iterative approach allows us to identify the children topics and offers a better understanding of topic hierarchies in a fine-grained paradigm. Our findings reveal that our method captures more latent topics in our ESG report corpus than the leading method and provides more coherent topics with comparable relational accuracy.",
keywords = "Document Classification, Knowledge Discovery/Representation, Text Analytics, Text categorisation, Text Mining, Topic Detection, Tracking",
author = "Saif Alnajjar and Xinyu Wang and Yulan He",
note = "Publisher Copyright: {\textcopyright} 2024 ELRA Language Resource Association.; Joint Workshop of the 7th Financial Technology and Natural Language Processing, 5th Knowledge Discovery from Unstructured Data in Financial Services and 4th Economics and Natural Language Processing, FinNLP-KDF-ECONLP 2024 ; Conference date: 20-05-2024",
year = "2024",
language = "English",
series = "Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services and the 4th Economics and Natural Language Processing, FinNLP-KDF-ECONLP 2024 at LREC-COLING 2024 - Workshop Proceedings",
publisher = "European Language Resources Association (ELRA)",
pages = "178--187",
editor = "Chung-Chi Chen and Zhiqiang Ma and Udo Hahn",
booktitle = "Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services and the 4th Economics and Natural Language Processing, FinNLP-KDF-ECONLP 2024 at LREC-COLING 2024 - Workshop Proceedings",
}