@inproceedings{a2ccfe41c42b4615977ab638e0af7d54,
title = "Solving complex sequential decision-making problems by deep reinforcement learning with heuristic rules",
abstract = "Deep reinforcement learning (RL) has demonstrated great capabilities in dealing with sequential decision-making problems, but its performance is often bounded by suboptimal solutions in many complex applications. This paper proposes the use of human expertise to increase the performance of deep RL methods. Human domain knowledge is characterized by heuristic rules and they are utilized adaptively to alter either the reward signals or environment states during the learning process of deep RL. This prevents deep RL methods from being trapped in local optimal solutions and computationally expensive training process and thus allowing them to maximize their performance when carrying out designated tasks. The proposed approach is experimented with a video game developed using the Arcade Learning Environment. With the extra information provided at the right time by human experts via heuristic rules, deep RL methods show greater performance compared with circumstances where human knowledge is not used. This implies that our approach of utilizing human expertise for deep RL has helped to increase the performance of deep RL and it has a great potential to be generalized and applied to solve complex real-world decision-making problems efficiently.",
keywords = "Complex problems, Heuristic rules, Human expertise, Reinforcement learning, Sequential decision making",
author = "Nguyen, \{Thanh Thi\} and Nguyen, \{Cuong M.\} and Thien Huynh-The and Pham, \{Quoc Viet\} and Nguyen, \{Quoc Viet Hung\} and Imran Razzak and Reddi, \{Vijay Janapa\}",
note = "Funding Information: Acknowledgments. This research is financially supported by the Russian Science Foundation, Agreement 17-71-30029 (https://rscf.ru/en/project/17-71-30029/), with co-financing of Bank Saint Petersburg. Funding Information: Acknowledgements. The work of Klaudia Ba{\l}azy and {\L}ukasz Struski was supported by the National Centre of Science (Poland) Grant No. 2020/39/D/ST6/01332. The research of Jacek Tabor was carried out within the research project “Bio-inspired artificial neural network” (grant no. POIR.04.04.00-00-14DE/18-00) within the Team-Net program of the Foundation for Polish Science co-financed by the European Union under the European Regional Development Fund. The work of Marek {\'S}mieja was supported by the National Centre of Science (Poland) Grant No. 2022/45/B/ST6/01117. Klau-dia Balazy is affiliated with Doctoral School of Exact and Natural Sciences at the Jagiellonian University. Funding Information: Acknowledgement. One of the authors Dr. Sanjay K. Sahay is thankful to Data Security Council of India for financial support to work on the Android malware detection system. Funding Information: The publication has been supported by a grant from the Faculty of Management and Social Communication under the Strategic Programme Excellence Initiative at Jagiellonian University. Funding Information: Acknowledgements. This work was supported by NVIDIA (project “Patient-specific models of the heart for precision medicine”, NVIDIA Academic Hardware Grant Program), the Federal University of Juiz de Fora, Brazil, through the scholarship “Coor-dena{\c c}{\~a}o de Aperfei{\c c}oamento de Pessoal de N{\'i}vel Superior” (CAPES) -Brazil-Finance Code 001; by Conselho Nacional de Desenvolvimento Cient{\'i}fico e Tecnol{\'o}gico (CNPq)-Brazil Grant numbers 423278/2021-5, 308745/2021-3 and 310722/2021-7; by Funda{\c c}{\~a}o de Amparo {\`a} Pesquisa do Estado de Minas Gerais (FAPEMIG) TEC APQ 01340/18. Funding Information: Acknowledgements. Research project supported by the program “Excellence initiative - research university” for the AGH University of Science and Technology. Funding Information: This material is based upon work supported by the National Science Foundation under Grant No. 2042155. Funding Information: This research was partially supported by funds from the Polish Ministry of Education and Science assigned to AGH University of Science and Technology (AB, WT, MKD) and Cracow University of Science and Technology (LB). This research was supported by PLGrid Infrastructure. Funding Information: Acknowledgements. This work has been supported by UFJF, by CAPES - Finance Code 001; by CNPq - Grant number 308745/2021-3; and by FAPEMIG Grant number APQ 02830/17 and APQ-02513-22; by FINEP SOS Equipamentos 2021 AV02 0062/22. Funding Information: Patrick Vega has received funding from: the Chilean National Research and Development Agency (ANID) though the grant ANID FONDECYT No. 3220858. Funding Information: Acknowledgements. The research was conducted under the Implementation Doctorate programme of Polish Ministry of Science and Higher Education and also partially funded by Department of Artificial Intelligence, Wroclaw Tech and by the European Union under the Horizon Europe grant OMINO (grant number 101086321). It was also partially co-funded by the European Regional Development Fund within Measure 1.1. “Enterprise R\&D Projects”, Sub-measure 1.1.1. “Industrial research and development by companies” as part of The Operational Programme Smart Growth 2014-2020, support contract no. POIR.01.01.01-00-0876/20-00. Funding Information: Acknowledgments. David Pardo has received funding from: the European Union{\textquoteright}s Horizon 2020 research and innovation program under the Marie Sklodowska-Curie grant agreement No. 777778 (MATHROCKS); the Spanish Ministry of Science and Innovation projects with references TED2021-132783B-I00, PID2019-108111RB-I00 (FEDER/AEI) and PDC2021-121093-I00 (MCIN/AEI/10.13039/501100011033/Next Generation EU), the “BCAM Severo Ochoa” accreditation of excellence CEX2021-001142-S/MICIN/AEI/10.13039/501100011033; and the Basque Government through the BERC 2022–2025 program, the three Elkartek projects 3KIA (KK-2020/00049), EXPERTIA (KK-2021/00048), and SIGZE (KK-2021/00095), and the Consolidated Research Group MATHMODE (IT1456-22) given by the Department of Education. Funding Information: Acknowledgements. Work of Marek Bolanowski and Andrzej Paszkiewicz is financed by the Minister of Education and Science of the Republic of Poland within the “Regional Initiative of Excellence” program for years 2019-2023. Project number 027/RID/2018/19, amount granted 11 999 900 PLN. Work of Maria Ganzha and Marcin Paprzycki was funded in part by the European Commission, under the Horizon Europe project ASSIST-IoT, grant number 957258. Funding Information: Acknowledgements. This work was supported by the European Regional Development Fund as a part of 2014–2020 Smart Growth Operational Programme, CLARIN-Common Language Resources and Technology Infrastructure, project no. POIR.04.02.00-00C002/19 and by the project co-financed by the Minister of Education and Science under the agreement 2022/WK/09. Funding Information: Acknowledgments. The research presented in this paper was supported by funds allocated to the AGH University of Krakow by the Polish Ministry of Science and Higher Education. The authors utilized the PL-Grid Infrastructure and computing resources provided by ACK Cyfronet. Funding Information: Acknowledgements. The project financed under the program of the Polish Minister of Science and Higher Education under the name “Regional Initiative of Excellence” in the years 2019 - 2023 project number 020/RID/2018/19 the amount of financing PLN 12,000,000. Funding Information: This publication is supported by the European Union{\textquoteright}s Horizon 2020 research and innovation program under grant agreement Sano No 857533. This publication is supported by the Sano project carried out within the International Research Agendas program of the Foundation for Polish Science, co-financed by the European Union under the European Regional Development Fund. Funding Information: This work is supported by NOVA LINCS (UIDB/04516/2020) with the financial support of FCT.IP. Funding Information: Acknowledgments. The authors acknowledge the support of NSF grant CMMI-1953323, a PITA (Pennsylvania Infrastructure Technology Alliance) grant, and a PMFI (Pennsylvania Manufacturing Fellows Initiative) grant. This work used the Extreme Science and Engineering Discovery Environment (XSEDE), which is supported by National Science Foundation grant number ACI-1548562. Specifically, it used the Bridges-2 system, which is supported by NSF award number ACI-1928147, at the Pittsburgh Supercomputing Center (PSC). The authors would like to thank Faqia Shahid and Sara Begane for running the simulation in the neuron geometry of NMO\_134036 and NMO\_06846, respectively. Funding Information: Acknowledgements. This work was supported by the Carl Zeiss Foundation within the project Interactive Inference and from the Ministry for Economics, Sciences and Digital Society of Thuringia (TMWWDG), under the framework of the Landespro-gramm ProDigital (DigLeben-5575/10-9). Funding Information: Acknowledgements. The project is financed under the 2014–2020 Smart Development Operational Programme, Priority IV: Increasing the scientific and research potential, Measure 4.2: Development of modern research infrastructure of the science sector, No. POIR.04.02.00-00C002/19, {"}CLARIN - Common Language Resources and Technology Infrastructure{"} and by the project co-financed by the Minister of Education and Science under the agreement 2022/WK/09. Funding Information: Acknowledgements. ATARCA received funding from the EU Horizon 2020 agreement No 964678. The authors thank Prof Pekka Nikander for constitutional entrepreneurship, and Prof Juuso T{\"o}yli, Prof Len Malczynski, Dr. Sampsa Ruutu, Prof Heikki H{\"a}mm{\"a}inen, Prof Raimo Kantola, and Prof Petri M{\"a}h{\"o}nen. Funding Information: Acknowledgements. We acknowledge Michael Brown and Los Alamos National Laboratory for the possibility to use the Quick Urban \& Industrial Complex Dispersion Modeling System. This work was partially supported by Ministry of Education and Science, project number: DNK/SP/549572/2022. Funding Information: Acknowledgments. This work was supported by the Department of Computer Graphics, Vision, and Digital Systems, under the statutory research project (Rau6, 2023), Silesian University of Technology (Gliwice, Poland). Funding Information: This work has been supported by the project PRA 2020 61 of the University of Pisa and by the Spoke 1 “FutureHPC \& BigData” of the Italian Research Center on High-Performance Computing, Big Data and Quantum Computing (ICSC) funded by MUR Missione 4 Componente 2 Investimento 1.4: Potenziamento strutture di ricerca e creazione di “campioni nazionali di R\&S (M4C2-19 )” - Next Generation EU (NGEU). Funding Information: (ABCI) provided by National Institute of Advanced Industrial Science and Technology (AIST) was used. This work was supported by MEXT as “Program for Promoting Researches on the Supercomputer Fugaku” (Large-scale numerical simulation of earthquake generation, wave propagation and soil amplification, JPMXP1020200203). This work was supported by JSPS KAKENHI Grant Numbers 18H05239, 22K12057, 22K18823. This work was supported by MEXT, under its Earthquake and Volcano Hazards Observation and Research Program. This work was supported by JST SPRING, Grant Number JPMJSP2108. Funding Information: Acknowledgements. This work was supported in part by the Joint Usage and Research Center for Interdisciplinary Large-Scale Information Infrastructure and Innovative High Performance Computing Infrastructure (project numbers: jh210047-NAH, jh220017, jh230042, hp220040, and hp230046), as well as the Japan Society for the Promotion of Science (JSPS) KAKENHI Grant Number JP22K12049. The numerical Funding Information: Acknowledgments. The authors would like to express their thanks to CAPES (Finance Code 001 and Projeto CAPES - Processo 88881.708850/2022-01), CNPq (308745/2021–3), FAPEMIG (APQ-02830/17 and APQ-01226-21), FINEP (SOS Equipamentos 2021 AV02 0062/22) and UFJF for funding this work. Funding Information: Acknowledgments. This research has been supported by the Agencia Estatal de Investigacion (AEI), Spain and the Fondo Europeo de Desarrollo Regional (FEDER) UE, under contract PID2020-112496GB-I00 and partially funded by the Fundacion Escuelas Universitarias Gimbernat (EUG). Funding Information: Acknowledgement. The study is supported by the project “Big Data in Atmospheric Physics (BINARY)”, funded by the Carl Zeiss Foundation (grant P2018-02-003). We acknowledge the ECMWF for providing access to the ERA5 reanalysis data and the ZDV of JGU for providing access to Mogon II. We further acknowledge Daniel Kunkel for supporting us with data management and thank Michael Wand for fruitful discussions. Funding Information: Acknowledgements. This paper is funded from the XPM (Explainable Predictive Maintenance) project funded by the National Science Center, Poland under CHIST-ERA programme Grant Agreement No. 857925 (NCN UMO-2020/02/Y/ST6/00070). Funding Information: Supported by Lodz University of Technology, Institute of Electronics. Funding Information: Acknowledgments. This research is supported by Russian Scientific Foundation and Saint Petersburg Scientific Foundation, grant No. 23-28-10069 “Forecasting social well-being in order to optimize the functioning of the urban digital services ecosystem in St. Petersburg” (https://rscf.ru/project/23-28-10069/). Funding Information: Acknowledgement. This work has been granted by the Spanish Ministry of Science and Innovation MCIN AEI/10.13039/501100011033 under contract PID2020-113614RB-C21 and by the Catalan government under grant 2021 SGR-00574. Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; International Conference on Computational Science 2023, ICCS 2023 ; Conference date: 03-07-2023 Through 05-07-2023",
year = "2023",
doi = "10.1007/978-3-031-36021-3\_30",
language = "English",
isbn = "9783031360206",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "298--305",
editor = "Ji{\v r}{\'i} Miky{\v s}ka and \{de Mulatier\}, Cl{\'e}lia and Maciej Paszynski and Krzhizhanovskaya, \{Valeria V.\} and Dongarra, \{Jack J.\} and Sloot, \{Peter M.A.\}",
booktitle = "23rd International Conference Prague, Czech Republic, July 3–5, 2023 Proceedings, Part II",
address = "Switzerland",
url = "https://link.springer.com/book/10.1007/978-3-031-36021-3, https://www.iccs-meeting.org/iccs2023/",
}