@article{4fc83b283e434e64b3f1cbf1c9650630,
title = "Learning deep representations of enzyme thermal adaptation",
abstract = "Temperature is a fundamental environmental factor that shapes the evolution of organisms. Learning thermal determinants of protein sequences in evolution thus has profound significance for basic biology, drug discovery, and protein engineering. Here, we use a data set of over 3 million BRENDA enzymes labeled with optimal growth temperatures (OGTs) of their source organisms to train a deep neural network model (DeepET). The protein-temperature representations learned by DeepET provide a temperature-related statistical summary of protein sequences and capture structural properties that affect thermal stability. For prediction of enzyme optimal catalytic temperatures and protein melting temperatures via a transfer learning approach, our DeepET model outperforms classical regression models trained on rationally designed features and other deep-learning-based representations. DeepET thus holds promise for understanding enzyme thermal adaptation and guiding the engineering of thermostable enzymes.",
keywords = "bioinformatics, deep neural networks, enzyme catalytic temperatures, optimal growth temperatures, protein thermostability, transfer learning",
author = "Gang Li and Filip Buric and Jan Zrimec and Sandra Viknander and Jens Nielsen and Aleksej Zelezniak and Engqvist, {Martin K.M.}",
note = "Funding Information: GL and JN have received funding from the European Union's Horizon 2020 research and innovation program under the Marie Sk{\l}odowska‐Curie program, project PAcMEN (Grant agreement no. 722287). JN also acknowledges funding from the Novo Nordisk Foundation (Grant no. NNF10CC1016517), the Knut and Alice Wallenberg Foundation. The study was supported by SciLifeLab funding and the Swedish Research Council (Vetenskapsr{\aa}det) starting Grant no. 2019‐05356. AZ was supported by the Marius Jakulis Jason Foundation and JZ by the Slovenian Research Agency (ARRS) Grant no. J2‐3060 and Public Scholarship, Development, Disability and Maintenance Fund of the Republic of Slovenia Grant no. 11013‐9/2021‐2. The computations were performed on resources at Chalmers Center for Computational Science and Engineering (C3SE) provided by the Swedish National Infrastructure for Computing (SNIC). Mikael {\"O}hman and Thomas Svedberg at C3SE are acknowledged for technical assistance. Funding Information: GL and JN have received funding from the European Union's Horizon 2020 research and innovation program under the Marie Sk{\l}odowska-Curie program, project PAcMEN (Grant agreement no. 722287). JN also acknowledges funding from the Novo Nordisk Foundation (Grant no. NNF10CC1016517), the Knut and Alice Wallenberg Foundation. The study was supported by SciLifeLab funding and the Swedish Research Council (Vetenskapsr{\aa}det) starting Grant no. 2019-05356. AZ was supported by the Marius Jakulis Jason Foundation and JZ by the Slovenian Research Agency (ARRS) Grant no. J2-3060 and Public Scholarship, Development, Disability and Maintenance Fund of the Republic of Slovenia Grant no. 11013-9/2021-2. The computations were performed on resources at Chalmers Center for Computational Science and Engineering (C3SE) provided by the Swedish National Infrastructure for Computing (SNIC). Mikael {\"O}hman and Thomas Svedberg at C3SE are acknowledged for technical assistance. Publisher Copyright: {\textcopyright} 2022 The Authors. Protein Science published by Wiley Periodicals LLC on behalf of The Protein Society.",
year = "2022",
month = dec,
doi = "10.1002/pro.4480",
language = "English",
volume = "31",
journal = "Protein Science",
issn = "0961-8368",
publisher = "Wiley",
number = "12",
}