list.bib

@inproceedings{Hammond2021equilibrium,
  abstract = {Multi-agent influence diagrams (MAIDs) are a popular form of graphical model that, for certain classes of games, have been shown to offer key complexity and explainability advantages over traditional extensive form game (EFG) representations. In this paper, we extend previous work on MAIDs by introducing the concept of a MAID subgame, as well as subgame perfect and trembling hand perfect equilibrium refinements. We then prove several equivalence results between MAIDs and EFGs. Finally, we describe an open source implementation for reasoning about MAIDs and computing their equilibria.},
  archiveprefix = {arXiv},
  arxivid = {2102.05008},
  author = {Hammond, Lewis and Fox, James and Everitt, Tom and Abate, Alessandro and Wooldridge, Michael},
  booktitle = {AAMAS},
  eprint = {2102.05008},
  title = {Equilibrium Refinements for Multi-Agent Influence Diagrams: Theory and Practice},
  year = {2021}
}

@inproceedings{Everitt2021agent,
  abstract = {We present a framework for analysing agent incentives using causal influence diagrams. We establish that a well-known criterion for value of information is complete. We propose a new graphical criterion for value of control, establishing its soundness and completeness. We also introduce two new concepts for incentive analysis: response incentives indicate which changes in the environment affect an optimal decision, while instrumental control incentives establish whether an agent can influence its utility via a variable X. For both new concepts, we provide sound and complete graphical criteria. We show by example how these results can help with evaluating the safety and fairness of an AI system.},
  archiveprefix = {arXiv},
  arxivid = {2102.01685},
  author = {Everitt, Tom and Carey, Ryan and Langlois, Eric and Ortega, Pedro A and Legg, Shane},
  booktitle = {AAAI},
  eprint = {2102.01685},
  title = {Agent Incentives: A Causal Perspective},
  year = {2021}
}

@inproceedings{Langlois2021,
  abstract = {Reinforcement learning in complex environments may require supervision to prevent the agent from attempting dangerous actions. As a result of supervisor intervention, the executed action may differ from the action specified by the policy. How does this affect learning? We present the Modified-Action Markov Decision Process, an extension of the MDP model that allows actions to differ from the policy. We analyze the asymptotic behaviours of common reinforcement learning algorithms in this setting and show that they adapt in different ways: some completely ignore modifications while others go to various lengths in trying to avoid action modifications that decrease reward. By choosing the right algorithm, developers can prevent their agents from learning to circumvent interruptions or constraints, and better control agent responses to other kinds of action modification, like self-damage.},
  archiveprefix = {arXiv},
  arxivid = {2102.07716},
  author = {Langlois, Eric and Everitt, Tom},
  booktitle = {AAAI},
  eprint = {2102.07716},
  title = {How {RL} Agents Behave When Their Actions Are Modified},
  year = {2021}
}

@article{Everitt2021tampering,
  abstract = {Can humans get arbitrarily capable reinforcement learning (RL) agents to do their bidding? Or will sufficiently capable RL agents always find ways to bypass their intended objectives by shortcutting their reward signal? This question impacts how far RL can be scaled, and whether alternative paradigms must be developed in order to build safe artificial general intelligence. In this paper, we study when an RL agent has an instrumental goal to tamper with its reward process, and describe design principles that prevent instrumental goals for two different types of reward tampering (reward function tampering and RF-input tampering). Combined, the design principles can prevent both types of reward tampering from being instrumental goals. The analysis benefits from causal influence diagrams to provide intuitive yet precise formalizations.},
  archiveprefix = {arXiv},
  arxivid = {1908.04734},
  author = {Everitt, Tom and Hutter, Marcus and Kumar, Ramana and Krakovna, Victoria},
  eprint = {1908.04734},
  journal = {Synthese},
  keywords = {AI Safety,Causality,Delusion box,Influence diagrams,Reinforcement learning,Reward modeling,Wireheading},
  title = {Reward tampering problems and solutions in reinforcement learning: A causal influence diagram perspective},
  year = {2021}
}

@misc{Kumar2020REALab,
  abstract = {This paper describes REALab, a platform for embedded agency research in reinforcement learning (RL). REALab is designed to model the structure of tampering problems that may arise in real-world deployments of RL. Standard Markov Decision Process (MDP) formulations of RL and simulated environments mirroring the MDP structure assume secure access to feedback (e.g., rewards). This may be unrealistic in settings where agents are embedded and can corrupt the processes producing feedback (e.g., human supervisors, or an implemented reward function). We describe an alternative Corrupt Feedback MDP formulation and the REALab environment platform, which both avoid the secure feedback assumption. We hope the design of REALab provides a useful perspective on tampering problems, and that the platform may serve as a unit test for the presence of tampering incentives in RL agent designs.},
  archiveprefix = {arXiv},
  arxivid = {2011.08820},
  author = {Kumar, Ramana and Uesato, Jonathan and Ngo, Richard and Everitt, Tom and Krakovna, Victoria and Legg, Shane},
  eprint = {2011.08820},
  title = {{REAL}ab: An Embedded Perspective on Tampering},
  year = {2020}
}

@misc{Uesato2020decoupled,
  abstract = {How can we design agents that pursue a given objective when all feedback mechanisms are influenceable by the agent? Standard RL algorithms assume a secure reward function, and can thus perform poorly in settings where agents can tamper with the reward-generating mechanism. We present a principled solution to the problem of learning from influenceable feedback, which combines approval with a decoupled feedback collection procedure. For a natural class of corruption functions, decoupled approval algorithms have aligned incentives both at convergence and for their local updates. Empirically, they also scale to complex 3D environments where tampering is possible.},
  archiveprefix = {arXiv},
  arxivid = {2011.08827},
  author = {Uesato, Jonathan and Kumar, Ramana and Krakovna, Victoria and Everitt, Tom and Ngo, Richard and Legg, Shane},
  eprint = {2011.08827},
  title = {Avoiding Tampering Incentives in Deep {RL} via Decoupled Approval},
  year = {2020}
}

@misc{Carey2019shape,
  title = {The incentives that shape behavior},
  author = {Carey, Ryan and Langlois, Eric and Everitt, Tom and Legg, Shane},
  year = {2020},
  eprint = {2001.07118},
  archiveprefix = {arXiv},
  arxivid = {2001.07118},
  abstract = {Which variables does an agent have an incentive to control with its decision, and which variables does it have an incentive to respond to? We formalise these incentives, and demonstrate unique graphical criteria for detecting them in any single decision causal influence diagram. To this end, we introduce structural causal influence models, a hybrid of the influence diagram and structural causal model frameworks. Finally, we illustrate how these incentives predict agent incentives in both fairness and AI safety applications.},
  note = {presented at the SafeAI AAAI workshop}
}

@inproceedings{Everitt2019modeling,
  archiveprefix = {arXiv},
  arxivid = {1906.08663},
  author = {Everitt, Tom and Kumar, Ramana and Krakovna, Victoria and Legg, Shane},
  booktitle = {IJCAI AI Safety Workshop},
  eprint = {1906.08663},
  title = {Modeling {AGI} Safety Frameworks with Causal Influence Diagrams},
  year = {2019},
  abstract = {Proposals for safe AGI systems are typically made at the level of frameworks, specifying how the components of the proposed system should be trained and interact with each other. In this paper, we model and compare the most promising AGI safety frameworks using causal influence diagrams. The diagrams show the optimization objective and causal assumptions of the framework. The unified representation permits easy comparison of frameworks and their assumptions. We hope that the diagrams will serve as an accessible and visual introduction to the main AGI safety frameworks.}
}

@misc{Everitt2019incentives1,
  title = {Understanding Agent Incentives using Causal Influence Diagrams. {Part I}: {S}ingle Action Settings},
  author = {Everitt, Tom and Ortega, Pedro A and Barnes, Elizabeth and Legg, Shane},
  archiveprefix = {arXiv},
  eprint = {1902.09980},
  primaryclass = {cs.AI},
  year = {2019},
  abstract = {Agents are systems that optimize an objective function in an environment. Together, the goal and the environment induce secondary objectives, incentives. Modeling the agent-environment interaction in graphical models called influence diagrams, we can answer two fundamental questions about an agent's incentives directly from the graph: (1) which nodes is the agent incentivized to observe, and (2) which nodes is the agent incentivized to influence? The answers tell us which information and influence points need extra protection. For example, we may want a classifier for job applications to not use the ethnicity of the candidate, and a reinforcement learning agent not to take direct control of its reward mechanism. Different algorithms and training paradigms can lead to different influence diagrams, so our method can be used to identify algorithms with problematic incentives and help in designing algorithms with better incentives.}
}

@misc{Leike2018alignment,
  title = {Scalable agent alignment via reward modeling: a research direction},
  author = {Leike, Jan and Krueger, David and Everitt, Tom and Martic, Miljan and
  Maini, Vishal and Legg, Shane},
  archiveprefix = {arXiv},
  eprint = {1811.07871},
  primaryclass = {cs.LG},
  year = {2018},
  abstract = {One obstacle to applying reinforcement learning algorithms to real-world problems is the lack of suitable reward functions. Designing such reward functions is difficult in part because the user only has an implicit understanding of the task objective. This gives rise to the agent alignment problem: how do we create agents that behave in accordance with the user's intentions? We outline a high-level research direction to solve the agent alignment problem centered around reward modeling: learning a reward function from interaction with the user and optimizing the learned reward function with reinforcement learning. We discuss the key challenges we expect to face when scaling reward modeling to complex and general domains, concrete approaches to mitigate these challenges, and ways to establish trust in the resulting agents.}
}

@phdthesis{Everitt2018thesis,
title = {Towards Safe Artificial General Intelligence},
author = {Tom Everitt},
school = {Australian National University},
year = {2018},
month = {May},
url = {http://hdl.handle.net/1885/164227},
abstract = {The field of artificial intelligence has recently experienced a number of breakthroughs thanks to progress in deep learning and reinforcement learning. Computer algorithms now outperform humans at Go, Jeopardy, image classification, and lip reading, and are becoming very competent at driving cars and interpreting natural language. The rapid development has led many to conjecture that artificial intelligence with greater-than-human ability on a wide range of tasks may not be far. This in turn raises concerns whether we know how to control such systems, in case we were to successfully build them.

Indeed, if humanity would find itself in conflict with a system of much greater intelligence than itself, then human society would likely lose. One way to make sure we avoid such a conflict is to ensure that any future AI system with potentially greater-than-human-intelligence has goals that are aligned with the goals of the rest of humanity. For example, it should not wish to kill humans or steal their resources.

The main focus of this thesis will therefore be goal alignment, i.e. how to design artificially intelligent agents with goals coinciding with the goals of their designers. Focus will mainly be directed towards variants of reinforcement learning, as reinforcement learning currently seems to be the most promising path towards powerful artificial intelligence. We identify and categorize goal misalignment problems in reinforcement learning agents as designed today, and give examples of how these agents may cause catastrophes in the future. We also suggest a number of reasonably modest modifications that can be used to avoid or mitigate each identified misalignment problem. Finally, we also study various choices of decision algorithms, and conditions for when a powerful reinforcement learning system will permit us to shut it down.

The central conclusion is that while reinforcement learning systems as designed today are inherently unsafe to scale to human levels of intelligence, there are ways to potentially address many of these issues without straying too far from the currently so successful reinforcement learning paradigm. Much work remains in turning the high-level proposals suggested in this thesis into practical algorithms, however.
}
}

@techreport{Everitt2018alignment,
  title = {The Alignment Problem for {B}ayesian History-Based Reinforcement Learners},
  author = {Everitt, Tom and Hutter, Marcus},
  year = {2018},
  abstract = {Value alignment is often considered a critical component of safe artificial intelligence. Meanwhile, reinforcement learning is often criticized as being inherently unsafe and misaligned, for reasons such as wireheading, delusion boxes, misspecified reward functions and distributional shifts. In this report, we categorize sources of misalignment for reinforcement learning agents, illustrating each type with numerous examples. For each type of problem, we also describe ways to remove the source of misalignment. Combined, the suggestions form high-level blueprints for how to design value aligned RL agents.},
  url = {http://www.tomeveritt.se/papers/alignment.pdf},
  note = {Winner of the AI Alignment Prize.}
}

@inproceedings{Everitt2018litrev,
  title = {{AGI} Safety Literature Review},
  author = {Everitt, Tom and Lea, Gary and Hutter, Marcus},
  year = {2018},
  booktitle = {International Joint Conference on AI (IJCAI)},
  archiveprefix = {arXiv},
  eprint = {1805.01109},
  primaryclass = {cs.LG},
  abstract = {The development of Artificial General Intelligence (AGI) promises to be a major event. Along with its many potential benefits, it also raises serious safety concerns (Bostrom, 2014). The intention of this paper is to provide an easily accessible and up-to-date collection of references for the emerging field of AGI safety. A significant number of safety problems for AGI have been identified. We list these, and survey recent research on solving them. We also cover works on how best to think of AGI from the limited knowledge we have today, predictions for when AGI will first be created, and what will happen after its creation. Finally, we review the current public policy on AGI.},
  url = {https://www.ijcai.org/proceedings/2018/768}
}

@incollection{Everitt2018bc,
  author = {Everitt, Tom and Hutter, Marcus},
  booktitle = {Foundations of Trusted Autonomy},
  series = {Studies in Systems, Decision and Control},
  chapter = 2,
  editor = {Abbass, Hussein A. and Scholz, Jason and Reid, Darryn J.},
  pages = {15--46},
  publisher = {Springer},
  title = {Universal Artificial Intelligence: Practical Agents and Fundamental Challengs},
  url = {https://link.springer.com/chapter/10.1007/978-3-319-64816-3_2},
  year = 2018,
  isbn = {978-3-319-64816-3},
  doi = {10.1007/978-3-319-64816-3},
  abstract = {Foundational theories have contributed greatly to scientific progress in many fields. Examples include Zermelo-Fraenkel set theory in mathematics, and universal Turing machines in computer science. Universal Artificial Intelligence (UAI) is an increasingly well-studied foundational theory for artificial intelligence, based on ancient principles in the philosophy of science and modern developments in information and probability theory. Importantly, it refrains from making unrealistic Markov, ergodicity, or stationarity assumptions on the environment. UAI provides a theoretically optimal agent AIXI and principled ideas for constructing practical autonomous agents. The theory also makes it possible to establish formal results on the motivations of AI systems. Such results may greatly enhance the trustability of autonomous agents, and guide design choices towards more robust agent architectures and incentive schemes. Finally, UAI offers a deeper appreciation of fundamental problems such as the induction problem and the exploration-exploitation dilemma.}
}

@article{Leike2017gw,
  author = {{Leike}, Jan and {Martic}, Miljan and {Krakovna}, Victoria and {Ortega}, Pedro and 
	{Everitt}, Tom and {Lefrancq}, Andrew and {Orseau}, Laurent and {Legg}, Shane
	},
  title = {{AI Safety Gridworlds}},
  journal = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint = {1711.09883},
  primaryclass = {cs.LG},
  keywords = {Computer Science - Learning, Computer Science - Artificial Intelligence},
  year = 2017,
  month = nov,
  abstract = {We present a suite of reinforcement learning environments illustrating various safety properties of intelligent agents. These problems include safe interruptibility, avoiding side effects, absent supervisor, reward gaming, safe exploration, as well as robustness to self-modification, distributional shift, and adversaries. To measure compliance with the intended safe behavior, we equip each environment with a performance function that is hidden from the agent. This allows us to categorize AI safety problems into robustness and specification problems, depending on whether the performance function corresponds to the observed reward function. We evaluate A2C and Rainbow, two recent deep reinforcement learning agents, on our environments and show that they are not able to solve them satisfactorily.}
}

@inproceedings{Everitt2017rc,
  author = {Everitt, Tom and Krakovna, Victoria and Orseau, Laurent and Hutter, Marcus and Legg, Shane},
  title = {Reinforcement Learning with a Corrupted Reward Signal},
  booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on
               Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
               19-26, 2017},
  pages = {4705--4713},
  year = {2017},
  doi = {10.24963/ijcai.2017/656},
  url = {	https://doi.org/10.24963/ijcai.2017/656},
  abstract = {No real-world reward function is perfect. Sensory errors and software bugs may result in agents getting higher (or lower) rewards than they should. For example, a reinforcement learning agent may prefer states where a sensory error gives it the maximum reward, but where the true reward is actually small. We formalise this problem as a generalised Markov Decision Problem called Corrupt Reward MDP. Traditional RL methods fare poorly in CRMDPs, even under strong simplifying assumptions and when trying to compensate for the possibly corrupt rewards. Two ways around the problem are investigated. First, by giving the agent richer data, such as in inverse reinforcement learning and semi-supervised reinforcement learning, reward corruption stemming from systematic sensory errors may sometimes be completely managed. Second, by using randomisation to blunt the agent's optimisation, reward corruption can be partially managed under some assumptions.},
  archiveprefix = {arXiv},
  eprint = {1705.08417},
  primaryclass = {cs.AI},
  blog = {http://www.tomeveritt.se/paper/2017/05/29/reinforcement-learning-with-corrupted-reward-channel.html}
}

@inproceedings{Martin2017,
  author = {Martin, Jarryd and Narayanan S, Suraj and Everitt, Tom and Hutter, Marcus},
  title = {Count-Based Exploration in Feature Space for Reinforcement Learning},
  booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on
               Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
               19-26, 2017},
  pages = {2471--2478},
  year = {2017},
  doi = {10.24963/ijcai.2017/344},
  url = {	https://doi.org/10.24963/ijcai.2017/344},
  archiveprefix = {arXiv},
  eprint = {1706.08090},
  primaryclass = {cs.AI},
  url = {http://tomeveritt.se/papers/IJCAI-17-count-based-exploration.pdf},
  abstract = {We introduce a new count-based optimistic exploration algorithm for Reinforcement Learning (RL) that is feasible in environments with high-dimensional state-action spaces. The success of RL algorithms in these domains depends crucially on generalisation from limited training experience. Function approximation techniques enable RL agents to generalise in order to estimate the value of unvisited states, but at present few methods enable generalisation regarding uncertainty. This has prevented the combination of scalable RL algorithms with efficient exploration strategies that drive the agent to reduce its uncertainty. We present a new method for computing a generalised state visit-count, which allows the agent to estimate the uncertainty associated with any state. Our phi-pseudocount achieves generalisation by exploiting same feature representation of the state space that is used for value function approximation. States that have less frequently observed features are deemed more uncertain. The phi-Exploration-Bonus algorithm rewards the agent for exploring in feature space rather than in the untransformed state space. The method is simpler and less computationally expensive than some previous proposals, and achieves near state-of-the-art results on high-dimensional RL benchmarks.}
}

@inproceedings{Wangberg2017,
  author = {W{\"a}ngberg, Tobias
and B{\"o}{\"o}rs, Mikael
and Catt, Elliot
and Everitt, Tom
and Hutter, Marcus},
  editor = {Everitt, Tom
and Goertzel, Ben
and Potapov, Alexey},
  title = {A Game-Theoretic Analysis of the Off-Switch Game},
  booktitle = {Artificial General Intelligence: 10th International Conference, AGI 2017, Melbourne, VIC, Australia, August 15-18, 2017, Proceedings},
  year = {2017},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {167--177},
  abstract = {The off-switch game is a game theoretic model of a highly intelligent robot interacting with a human. In the original paper by Hadfield-Menell et al. (2016b), the analysis is not fully game-theoretic as the human is modelled as an irrational player, and the robot's best action is only calculated under unrealistic normality and soft-max assumptions. In this paper, we make the analysis fully game theoretic, by modelling the human as a rational player with a random utility function. As a consequence, we are able to easily calculate the robot's best action for arbitrary belief and irrationality assumptions.},
  isbn = {978-3-319-63703-7},
  doi = {10.1007/978-3-319-63703-7_16},
  url = {https://doi.org/10.1007/978-3-319-63703-7_16},
  archiveprefix = {arXiv},
  eprint = {1708.03871},
  primaryclass = {cs.GT}
}

@book{Everitt2017agi,
  editor = {Everitt, Tom
and Goertzel, Ben
and Potapov, Alexey},
  title = {Artificial General Intelligence: 10th International Conference, AGI 2017, Melbourne, VIC, Australia, August 15-18, 2017, Proceedings},
  year = {2017},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {275},
  isbn = {978-3-319-63702-0},
  doi = {10.1007/978-3-319-63703-7},
  url = {https://link.springer.com/book/10.1007/978-3-319-63703-7}
}

@inproceedings{Everitt2016sm,
  author = {Everitt, Tom
and Filan, Daniel
and Daswani, Mayank
and Hutter, Marcus},
  editor = {Steunebrink, Bas
and Wang, Pei
and Goertzel, Ben},
  title = {Self-Modification of Policy and Utility Function in Rational Agents},
  booktitle = {Artificial General Intelligence: 9th International Conference, AGI 2016, New York, NY, USA, July 16-19, 2016, Proceedings},
  year = 2016,
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {1--11},
  isbn = {978-3-319-41649-6},
  doi = {10.1007/978-3-319-41649-6_1},
  archiveprefix = {arXiv},
  eprint = {1605.03142},
  primaryclass = {cs.AI},
  abstract = {Any agent that is part of the environment it interacts with and has versatile actuators (such as arms and fingers), will in principle have the ability to self-modify -- for example by changing its own source code. As we continue to create more and more intelligent agents, chances increase that they will learn about this ability. The question is: will they want to use it? For example, highly intelligent systems may find ways to change their goals to something more easily achievable, thereby ``escaping'' the control of their creators. In an important paper, Omohundro (2008) argued that goal preservation is a fundamental drive of any intelligent system, since a goal is more likely to be achieved if future versions of the agent strive towards the same goal. In this paper, we formalise this argument in general reinforcement learning, and explore situations where it fails. Our conclusion is that the self-modification possibility is harmless if and only if the value function of the agent anticipates the consequences of self-modifications and use the current utility function when evaluating the future.}
}

@inproceedings{Everitt2016vrl,
  author = {Everitt, Tom
and Hutter, Marcus},
  editor = {Steunebrink, Bas
and Wang, Pei
and Goertzel, Ben},
  title = {Avoiding Wireheading with Value Reinforcement Learning},
  booktitle = {Artificial General Intelligence: 9th International Conference, AGI 2016, New York, NY, USA, July 16-19, 2016, Proceedings},
  year = {2016},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {12--22},
  isbn = {978-3-319-41649-6},
  doi = {10.1007/978-3-319-41649-6_2},
  archiveprefix = {arXiv},
  eprint = {1605.03143},
  primaryclass = {cs.AI},
  note = {\href{http://nbviewer.jupyter.org/url/tomeveritt.se/source-code/AGI-16/cp-vrl.ipynb}{{S}ource code}},
  abstract = {How can we design good goals for arbitrarily intelligent agents? Reinforcement learning (RL) may seem like a natural approach. Unfortunately, RL does not work well for generally intelligent agents, as RL agents are incentivised to shortcut the reward sensor for maximum reward -- the so-called wireheading problem. In this paper we suggest an alternative to RL called value reinforcement learning (VRL). In VRL, agents use the reward signal to learn a utility function. The VRL setup allows us to remove the incentive to wirehead by placing a constraint on the agent’s actions. The constraint is defined in terms of the agent's belief distributions, and does not require an explicit specification of which actions constitute wireheading.}
}

@inproceedings{Martin2016,
  author = {Martin, Jarryd
and Everitt, Tom
and Hutter, Marcus},
  editor = {Steunebrink, Bas
and Wang, Pei
and Goertzel, Ben},
  title = {Death and Suicide in Universal Artificial Intelligence},
  booktitle = {Artificial General Intelligence: 9th International Conference, AGI 2016, New York, NY, USA, July 16-19, 2016, Proceedings},
  year = {2016},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {23--32},
  isbn = {978-3-319-41649-6},
  doi = {10.1007/978-3-319-41649-6_3},
  archiveprefix = {arXiv},
  eprint = {1606.00652},
  primaryclass = {cs.AI},
  abstract = {Reinforcement learning (RL) is a general paradigm for studying intelligent behaviour, with applications ranging from artificial intelligence to psychology and economics. AIXI is a universal solution to the RL problem; it can learn any computable environment. A technical subtlety of AIXI is that it is defined using a mixture over semimeasures that need not sum to 1, rather than over proper probability measures. In this work we argue that the shortfall of a semimeasure can naturally be interpreted as the agent's estimate of the probability of its death. We formally define death for generally intelligent agents like AIXI, and prove a number of related theorems about their behaviour. Notable discoveries include that agent behaviour can change radically under positive linear transformations of the reward signal (from suicidal to dogmatically self-preserving), and that the agent's posterior belief that it will survive increases over time.}
}

@inproceedings{Everitt2015sdt,
  author = {Everitt, Tom
and Leike, Jan
and Hutter, Marcus},
  editor = {Walsh, Toby},
  title = {Sequential Extensions of Causal and Evidential Decision Theory},
  booktitle = {Algorithmic Decision Theory: 4th International Conference, ADT 2015, Lexington, KY, USA, September 27-30, 2015, Proceedings},
  year = {2015},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {205--221},
  isbn = {978-3-319-23114-3},
  doi = {10.1007/978-3-319-23114-3_13},
  archiveprefix = {arXiv},
  eprint = {1506.07359},
  primaryclass = {cs.AI},
  note = {\href{http://nbviewer.jupyter.org/url/tomeveritt.se/source-code/ADT-15/sdt.ipynb}{{S}ource code}},
  abstract = {Moving beyond the dualistic view in AI where agent and environment are separated incurs new challenges for decision making, as calculation of expected utility is no longer straightforward. The non-dualistic decision theory literature is split between causal decision theory and evidential decision theory. We extend these decision algorithms to the sequential setting where the agent alternates between taking actions and observing their consequences. We find that evidential decision theory has two natural extensions while causal decision theory only has one.}
}

@inproceedings{Everitt2015search1,
  author = {Everitt, Tom
and Hutter, Marcus},
  editor = {Pfahringer, Bernhard
and Renz, Jochen},
  title = {Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art {I}: Tree Search},
  booktitle = {AI 2015: Advances in Artificial Intelligence: 28th Australasian Joint Conference, Canberra, ACT, Australia, November 30 -- December 4, 2015, Proceedings},
  year = 2015,
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {157--165},
  isbn = {978-3-319-26350-2},
  doi = {10.1007/978-3-319-26350-2_14},
  note = {\href{http://www.tomeveritt.se/source-code/AusAI-15/bfs-dfs-code.zip}{{S}ource code}},
  abstract = {Breadth-first search (BFS) and depth-first search (DFS) are the two most fundamental search algorithms. We derive approximations of their expected runtimes in complete trees, as a function of tree depth and probabilistic goal distribution. We also demonstrate that the analytical approximations are close to the empirical averages for most parameter settings, and that the results can be used to predict the best algorithm given the relevant problem features.}
}

@inproceedings{Everitt2015search2,
  author = {Everitt, Tom
and Hutter, Marcus},
  editor = {Pfahringer, Bernhard
and Renz, Jochen},
  title = {Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art {II}: Graph Search},
  booktitle = {AI 2015: Advances in Artificial Intelligence: 28th Australasian Joint Conference, Canberra, ACT, Australia, November 30 -- December 4, 2015, Proceedings},
  year = {2015},
  publisher = {Springer International Publishing},
  address = {Cham},
  pages = {166--178},
  isbn = {978-3-319-26350-2},
  doi = {10.1007/978-3-319-26350-2_15},
  note = {\href{http://www.tomeveritt.se/source-code/AusAI-15/bfs-dfs-code.zip}{{S}ource code}},
  abstract = {The algorithm selection problem asks to select the best algorithm for a given problem. In the companion paper (Everitt and Hutter 2015b), expected runtime was approximated as a function of search depth and probabilistic goal distribution for tree search versions of breadth-first search (BFS) and depth-first search (DFS). Here we provide an analogous analysis of BFS and DFS graph search, deriving expected runtime as a function of graph structure and goal distribution. The applicability of the method is demonstrated through analysis of two different grammar problems. The approximations come surprisingly close to empirical reality.}
}

@techreport{Everitt2015searchreport,
  abstract = {Search is a central problem in artificial intelligence, and BFS and DFS the two most fundamental ways to search. In this report we derive results for average BFS and DFS runtime: For tree search, we employ a probabilistic model of goal distribution; for graph search, the analysis depends on an additional statistic of path redundancy and average branching factor. As an application, we use the results on two concrete grammar problems. The runtime estimates can be used to select the faster out of BFS and DFS for a given problem, and may form the basis for further analysis of more advanced search methods. Finally, we verify our results experimentally; the analytical approximations come surprisingly close to empirical reality.},
  archiveprefix = {arXiv},
  arxivid = {1509.02709},
  author = {Everitt, Tom and Hutter, Marcus},
  eprint = {1509.02709},
  institution = {Australian National University},
  keywords = {analytical algorithm selection,average runtime,bfs,dfs,graph search,heuristics,meta-,probabilistic goal distribution,tree search},
  pages = {1--36},
  title = {A Topological Approach to Meta-heuristics: Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem},
  year = {2015}
}

@inproceedings{Everitt2014nfl,
  author = {T. Everitt and T. Lattimore and M. Hutter},
  booktitle = {2014 IEEE Congress on Evolutionary Computation (CEC)},
  title = {Free Lunch for optimisation under the universal distribution},
  year = {2014},
  pages = {167-174},
  keywords = {optimisation;statistical distributions;computer science;function optimisation;no free lunch theorems;universal distribution;Complexity theory;Computers;Context;Information theory;Optimization;Search problems;Vectors},
  doi = {10.1109/CEC.2014.6900546},
  issn = {1089-778X},
  month = {July},
  archiveprefix = {arXiv},
  eprint = {1608.04544},
  primaryclass = {math.OC},
  abstract = {Function optimisation is a major challenge in computer science. The No Free Lunch theorems state that if all functions with the same histogram are assumed to be equally probable then no algorithm outperforms any other in expectation. We argue against the uniform assumption and suggest a universal prior exists for which there is a free lunch, but where no particular class of functions is favoured over another. We also prove upper and lower bounds on the size of the free lunch.}
}

@inproceedings{Alpcan2014,
  author = {T. Alpcan and T. Everitt and M. Hutter},
  booktitle = {Information Theory Workshop (ITW), 2014 IEEE},
  title = {Can we measure the difficulty of an optimization problem?},
  year = {2014},
  pages = {356-360},
  keywords = {information theory;optimisation;Shannon information theory;algorithmic information theory;black-box optimization;formal framework;Algorithm design and analysis;Complexity theory;Educational institutions;Information theory;Linear programming;Optimization;Runtime},
  doi = {10.1109/ITW.2014.6970853},
  issn = {1662-9019},
  month = {Nov},
  url = {http://www.tomeveritt.se/papers/ITW-14-diffopt.pdf},
  abstract = {Can we measure the difficulty of an optimization problem? Although optimization plays a crucial role in modern science and technology, a formal framework that puts problems and solution algorithms into a broader context has not been established. This paper presents a conceptual approach which gives a positive answer to the question for a broad class of optimization problems. Adopting an information and computational perspective, the proposed framework builds upon Shannon and algorithmic information theories. As a starting point, a concrete model and definition of optimization problems is provided. Then, a formal definition of optimization difficulty is introduced which builds upon algorithmic information theory. Following an initial analysis, lower and upper bounds on optimization difficulty are established. One of the upper-bounds is closely related to Shannon information theory and black-box optimization. Finally, various computational issues and future research directions are discussed.}
}

@phdthesis{Everitt2013a,
  author = {Everitt, Tom},
  number = {3},
  pages = {77},
  school = {Stockholm University},
  title = {Universal induction and optimisation: No free lunch?},
  type = {{MSc} Thesis},
  year = {2013},
  url = {http://www2.math.su.se/gemensamt/grund/exjobb/matte/2013/rep3/report.pdf},
  abstract = {Inductive reasoning is the process of making uncertain but justied inferences; often the goal is to infer a general theory from particular observations.  Despite being a central problem in both science and philosophy, a formal understanding of induction was long missing.  In 1964, substantial progress was made with Solomononoff's universal induction. Solomonoff formalized Occam's razor by means of algorithmic information theory, and used this to construct a universal Bayesian prior for sequence prediction.  The first part of this thesis gives a comprehensive overview of Solomonoff's theory of induction.  The optimisation problem of finding the argmax of an unknown function can be approached as an induction problem. However, optimisation differs in important respects from sequence prediction. We adapt universal induction to optimisation, and investigate its performance by putting it against the so-called No Free Lunch (NFL) theorems.  The NFL theorems show that under certain conditions, effective optimisation is impossible. We conclude that while universal induction avoids the classical NFL theorems, it does not work nearly as well in optimisation as in sequence prediction.}
}

@phdthesis{Everitt2010,
  author = {Everitt, Tom},
  school = {Stockholm University},
  title = {Automated Theorem Proving},
  type = {{BSc} Thesis},
  year = {2010},
  url = {http://www2.math.su.se/gemensamt/grund/exjobb/matte/2010/rep8/report.pdf},
  abstract = {The calculator was a great invention for the mathematician.  No longer was it necessary to spend the main part of the time doing tedious but trivial arithmetic computations.  A machine could do it both faster and more accurately.  A similar revolution might be just around the corner for proof
searching, the perhaps most time consuming part of the modern mathematician's work.  In this essay we present the Resolution procedure,  an algorithm that finds proofs for statements in propositional and  first-order logic.  This means that any true statement (expressible in either of these logics), in principle can be proven by a computer.  In fact, there are already practically usable implementations available; here we will illustrate the usage of one such program, Prover9, by some examples.  Just as many other theorem  provers,  Prover9  is  able  to  prove  many  non-trivial  true statements surprisingly fast.}
}

This file was generated by bibtex2html 1.99.