@article{d9e5c60051e648d19e0ca53719d3a415,
title = "Identifying homogeneous subgroups of patients and important features: a topological machine learning approach",
abstract = "Background: This paper exploits recent developments in topological data analysis to present a pipeline for clustering based on Mapper, an algorithm that reduces complex data into a one-dimensional graph. Results: We present a pipeline to identify and summarise clusters based on statistically significant topological features from a point cloud using Mapper. Conclusions: Key strengths of this pipeline include the integration of prior knowledge to inform the clustering process and the selection of optimal clusters; the use of the bootstrap to restrict the search to robust topological features; the use of machine learning to inspect clusters; and the ability to incorporate mixed data types. Our pipeline can be downloaded under the GNU GPLv3 license at https://github.com/kcl-bhi/mapper-pipeline.",
keywords = "Clustering, Machine learning, Topological data analysis",
author = "Ewan Carr and Mathieu Carri{\`e}re and Bertrand Michel and Fr{\'e}d{\'e}ric Chazal and Raquel Iniesta",
note = "Funding Information: This work has been supported by the Brain and Behavior Foundation awarded to Raquel Iniesta (Award number 26338). The funding body played no role in the design of the study, the collection, analysis, interpretation of data, or in writing the manuscript. This paper represents independent research part-funded by the National Institute for Health Research (NIHR) Maudsley Biomedical Research Centre at South London and Maudsley NHS Foundation Trust and King{\textquoteright}s College London. The views expressed are those of the author(s) and not necessarily those of the NHS, the NIHR or the Department of Health and Social Care. Publisher Copyright: {\textcopyright} 2021, The Author(s). Copyright: Copyright 2021 Elsevier B.V., All rights reserved.",
year = "2021",
month = dec,
doi = "10.1186/s12859-021-04360-9",
language = "English",
volume = "22",
journal = "BMC Bioinformatics",
issn = "1471-2105",
publisher = "BioMed Central",
number = "1",
}