@ARTICLE{Musial_Jakub_Self-improving_2021,
 author={Musial, Jakub and Stebel, Krzysztof and Czeczot, Jacek},
 volume={vol. 31},
 number={No 3},
 journal={Archives of Control Sciences},
 pages={527-551},
 howpublished={online},
 year={2021},
 publisher={Committee of Automatic Control and Robotics PAS},
 abstract={This paper presents how Q-learning algorithm can be applied as a general-purpose selfimproving controller for use in industrial automation as a substitute for conventional PI controller implemented without proper tuning. Traditional Q-learning approach is redefined to better fit the applications in practical control loops, including new definition of the goal state by the closed loop reference trajectory and discretization of state space and accessible actions (manipulating variables). Properties of Q-learning algorithm are investigated in terms of practical applicability with a special emphasis on initializing of Q-matrix based only on preliminary PI tunings to ensure bumpless switching between existing controller and replacing Q-learning algorithm. A general approach for design of Q-matrix and learning policy is suggested and the concept is systematically validated by simulation in the application to control two examples of processes exhibiting first order dynamics and oscillatory second order dynamics. Results show that online learning using interaction with controlled process is possible and it ensures significant improvement in control performance compared to arbitrarily tuned PI controller.},
 type={Article},
 title={Self-improving Q-learning based controller for a class of dynamical processes},
 URL={http://www.czasopisma.pan.pl/Content/120823/art03.pdf},
 doi={10.24425/acs.2021.138691},
 keywords={process control, Q-learning algorithm, reinforcement learning, intelligent control, on-line learning},
}