%Aigaion2 BibTeX export from Knowledge Engineering Publications
%Friday 17 December 2021 11:57:49 PM

@INPROCEEDINGS{jf:ECML-PKDD-11,
     author = {Cheng, Weiwei and F{\"{u}}rnkranz, Johannes and H{\"{u}}llermeier, Eyke and Park, Sang-Hyeun},
     editor = {Gunopulos, Dimitrios and Hofmann, Thomas and Malerba, Donato and Vazirgiannis, Michalis},
      title = {Preference-Based Policy Iteration: Leveraging Preference Learning for Reinforcement Learning},
  booktitle = {Proceedings of the 22nd European Conference on Machine Learning and 	Principles and Practice of Knowledge Discovery in Databases (ECML 	PKDD 2011, Athens, Greece), Part I},
       year = {2011},
      pages = {312--327},
  publisher = {Springer},
        url = {/publications/papers/ECML-PKDD-11.pdf},
   abstract = {This paper makes a first step toward the integration of two subfields of
machine learning, namely preference learning and reinforcement learning
(RL). An important motivation for a "preference-based" approach to
reinforcement learning is a possible extension of the type of feedback
an agent may learn from. In particular, while conventional RL methods
are essentially confined to deal with numerical rewards, there are many
applications in which this type of information is not naturally
available, and in which only qualitative reward signals are provided
instead. Therefore, building on novel methods for preference learning,
our general goal is to equip the RL agent with qualitative policy
models, such as ranking functions that allow for sorting its available
actions from most to least promising, as well as algorithms for learning
such models from qualitative feedback. Concretely, in this paper, we
build on an existing method for approximate policy iteration based on
roll-outs. While this approach is based on the use of classification
methods for generalization and policy learning, we make use of a
specific type of preference learning method called label ranking.
Advantages of our preference-based policy iteration method are
illustrated by means of two case studies.}
}