@inbook{1d715954a034484585fdb310f3cc03e7,
title = "CWTM: Leveraging Contextualized Word Embeddings from BERT for Neural Topic Modeling",
abstract = "Most existing topic models rely on bag-of-words (BOW) representation, which limits their ability to capture word order information and leads to challenges with out-of-vocabulary (OOV) words in new documents. Contextualized word embeddings, however, show superiority in word sense disambiguation and effectively address the OOV issue. In this work, we introduce a novel neural topic model called the Contextlized Word Topic Model (CWTM), which integrates contextualized word embeddings from BERT. The model is capable of learning the topic vector of a document without BOW information. In addition, it can also derive the topic vectors for individual words within a document based on their contextualized word embeddings. Experiments across various datasets show that CWTM generates more coherent and meaningful topics compared to existing topic models, while also accommodating unseen words in newly encountered documents.",
keywords = "BERT, Contextualized Word Embeddings, Topic Modeling",
author = "Zheng Fang and Yulan He and Rob Procter",
note = "Publisher Copyright: {\textcopyright} 2024 ELRA Language Resource Association: CC BY-NC 4.0.; Joint 30th International Conference on Computational Linguistics and 14th International Conference on Language Resources and Evaluation, LREC-COLING 2024 ; Conference date: 20-05-2024 Through 25-05-2024",
year = "2024",
language = "English",
series = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
publisher = "European Language Resources Association (ELRA)",
pages = "4273--4286",
editor = "Nicoletta Calzolari and Min-Yen Kan and Veronique Hoste and Alessandro Lenci and Sakriani Sakti and Nianwen Xue",
booktitle = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
}