{"version":1,"pages":[{"id":"-LaNHyhHIZRkXgavzJMW","title":"介绍","pathname":"/deep-reinforcement-learning","siteSpaceId":"sitesp_UzNmo","description":"@gitbook"},{"id":"-LaNIJ-rFNo-xcqS0U7H","title":"神经网络","pathname":"/deep-reinforcement-learning/qian-yan/shen-jing-wang-luo-mo-xing","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"}]},{"id":"-LaNIJ-t6yrxWukmHBf3","title":"研究平台","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"}]},{"id":"-LaNIJ-uDYp3KHyTZDf2","title":"街机游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/jie-ji-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ-v1ewDAD9qxpYf","title":"竞速游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/jing-su-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ-wxPU0bFYINMEw","title":"第一人称射击游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/di-yi-ren-cheng-she-ji-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ-x8NXPhA21i2iF","title":"开放世界游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/kai-fang-shi-jie-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ-ybd8DEl8mkder","title":"即时战略游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/ji-shi-zhan-lve-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ-zvDv86SAyU4VY","title":"团队体育游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/tuan-dui-ti-yu-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ0-CmL-8F6p1I_4","title":"文字冒险游戏","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/wen-zi-mao-xian-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ00btt112L9CtNg","title":"OpenAI Gym & Universe","pathname":"/deep-reinforcement-learning/qian-yan/yan-jiu-ping-tai/openai-gym-and-universe","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"前言"},{"label":"研究平台"}]},{"id":"-LaNIJ0I9kgRpRkF04lg","title":"街机游戏","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LaNIJ0T3ofBEqi4TTYM","title":"DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/deep-q-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0Un2wvGsVq9Nq4","title":"DRQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/deep-recurrent-q-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0VjXwUUQGS0WH_","title":"Gorila","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/distributed-deep-q-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0W5JifWmE8ow76","title":"Double DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/double-deep-q-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0XlexftBd9h7Op","title":"Prioritized Experience Replay","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/prioritized-experience-replay","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0YPmqaIOClp2Kd","title":"Dueling DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/dueling-deep-q-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0ZIo0oqIjI8GJ0","title":"Bootstrapped DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/bootstrapped-deep-q-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0_pQm4G6aiOQwu","title":"Multiagent DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/multiagent-deep-q-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0dtAwRtEYUTXKc","title":"Progressive  Neural Networks","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/progressive-neural-networks","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0a5edqsxWMFEGs","title":"A3C","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/asynchronous-deep-reinforcement-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LbRtu2HuOwWtshhqIMR","title":"Retrace(λ)","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/retrace-l","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0c327gDYLw2Tyq","title":"ACER","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/actor-critic-with-experience-replay","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0eUXWYWFKyFlNe","title":"ACKTR","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/actor-critic-using-kronecker-factored-trust-region","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0fwdASGnE5qgoS","title":"TRPO","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/trust-region-policy-optimization","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0gq8EqhDM45ksU","title":"PPO","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/proximal-policy-optimization","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0ipcZ68O6eOt-A","title":"UNREAL","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/unreal","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0hxXM1uOYwam2J","title":"IMPALA","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/impala","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0jNZYmGP-1-9O6","title":"Distributional DQN","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/distributional-dqn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0kQdGCwdV6N5kK","title":"Noisy-Net","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/noisy-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0le9fz2yaQ3vwX","title":"Rainbow","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/rainbow","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0m7U8sF22Uf-Eo","title":"ES","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/es","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0n2PiXZDMvYLue","title":"NS-ES","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/ns-es","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0oBH_Khltt1JLr","title":"Deep GA","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/deep-ga","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0pGISqF9r-6iUf","title":"Playing Atari with Six Neurons","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/playing-atari-with-six-neurons","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0qvoP2_NKolC1H","title":"UCTtoClassification","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/ucttoclassification","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0rd6GlwJu2spQe","title":"Policy Distillation","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/policy-distillation","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0siK-B3aqjiBTi","title":"Actor-Mimic","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/actor-mimic","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0tv5xL1aNaopIM","title":"Action-Conditional Video Prediction","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/action-conditional-video-predictionusing-deep-networks-in-atari-games","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0ueZ5AUkcdAxWT","title":"Self-Supervision","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/loss-is-its-own-reward-self-supervision-for-reinforcement-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNldD1R1dV_o8eO2p1","title":"HRA","pathname":"/deep-reinforcement-learning/fang-fa/jie-ji-you-xi/hra","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"街机游戏"}]},{"id":"-LaNIJ0vDwlvYsloxAsE","title":"蒙特祖玛的复仇","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LaNvEi3ZfW4u_t9WyOH","title":"Hierarchical-DQN","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/hierarchical-dqn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-LaPoS4l3RJWJzBhviqL","title":"DQN-CTS","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/unifying-count-based-exploration-and-intrinsic-motivation","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-LaigAqCZ8Q9IL9KPGK1","title":"Pixel Recurrent Neural Networks","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/pixel-recurrent-neural-networks","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-LahEvi-wwNc7nxinFhj","title":"DQN-PixelCNN","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/count-based-exploration-with-neural-density-models","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-Larhd48gb9Zo3u9iAvH","title":"Ape-X","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/ape-x","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-Lars2Zl-JiLPhMKwAbp","title":"DQfD","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/dqfd","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-LasA4NFC0eqOtxQieT2","title":"Ape-X  DQfD","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/ape-x-dqfd","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-Lb0pH0Akcxqes1s9tyk","title":"Natural Language Guided Reinforcement Learning","pathname":"/deep-reinforcement-learning/fang-fa/ment-te-zu-ma-de-fu-chou/beating-atari-with-natural-language-guided-reinforcement-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"蒙特祖玛的复仇"}]},{"id":"-LaNIJ0w4nXhmSgpA8wR","title":"竞速游戏","pathname":"/deep-reinforcement-learning/fang-fa/jing-su-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-Lb1NDCXdemytI5pu-M4","title":"Direct Perception","pathname":"/deep-reinforcement-learning/fang-fa/jing-su-you-xi/deep-driving","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"竞速游戏"}]},{"id":"-LaNIJ0x0XOIW0PJpggT","title":"DDPG","pathname":"/deep-reinforcement-learning/fang-fa/jing-su-you-xi/ddpg","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"竞速游戏"}]},{"id":"-LfEC-p6AbfGls_Vsdm0","title":"TD3","pathname":"/deep-reinforcement-learning/fang-fa/jing-su-you-xi/td3","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"竞速游戏"}]},{"id":"-LaNIJ0y-3mtk9aFOO4c","title":"第一人称射击游戏","pathname":"/deep-reinforcement-learning/fang-fa/di-yi-ren-cheng-she-ji-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LbAy_0mJiqNJA2O3aZr","title":"SLAM-Augmented DQN","pathname":"/deep-reinforcement-learning/fang-fa/di-yi-ren-cheng-she-ji-you-xi/dqn-with-slam","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"第一人称射击游戏"}]},{"id":"-LbBCX7gk6ivraVFjGA5","title":"Direct Future Prediction","pathname":"/deep-reinforcement-learning/fang-fa/di-yi-ren-cheng-she-ji-you-xi/direct-future-prediction","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"第一人称射击游戏"}]},{"id":"-LeuPN_obgrW0Fgx3p2p","title":"For The Win","pathname":"/deep-reinforcement-learning/fang-fa/di-yi-ren-cheng-she-ji-you-xi/for-the-win","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"第一人称射击游戏"}]},{"id":"-LaNIJ0zHHdMp561L6pl","title":"开放世界游戏","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-shi-jie-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LbLYGWuynktb3wSLR6r","title":"H-DRLN","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-shi-jie-you-xi/h-drln","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放世界游戏"}]},{"id":"-LbQd0jl6gamiHQs-tDp","title":"Feedback Recurrent Memory Q-Network","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-shi-jie-you-xi/recurrent-memory-q-network","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放世界游戏"}]},{"id":"-LbQfsOcs-kAh5IIk88a","title":"Teacher-Student Curriculum Learning","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-shi-jie-you-xi/teacher-student-curriculum-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放世界游戏"}]},{"id":"-LaNIJ1-3Ba-yCAOrQJw","title":"即时战略游戏","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LbaGQ-WrJAuKdHKLrX5","title":"Puppet Search","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/puppet-search","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbROdSfY2z05mmA4mX8","title":"Combined Strategic and Tacticals","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/combining-strategic-learning-and-tactical-search-in-real-time-strategy-games","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbRR6uk7E2lu5IJ0TfT","title":"Zero Order","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/zero-order","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbRRNkfONo2__tSiva9","title":"IQL","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/iql","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbRQHWfnDXtMGo1yMt1","title":"COMA","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/counterfactual-multi-agent-policy-gradients","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbRRihWyTCv_5Oi283Y","title":"BiC-Net","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/bic-net","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-Lbq4dgt0TUZuIkFYVLI","title":"Macro-action SL","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/macro-action-sl","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LbRSlNp2e6fj7aX6SBg","title":"Macro-action PPO","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/tstarbots","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-Lc-y85y3PfzU7zsvOkQ","title":"On Reinforcement Learning for Full-length Game of StarCraft","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/on-reinforcement-learning-for-full-length-game-of-starcraft","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LcPHtFsU_Zn42ddLLv0","title":"AlphaStar","pathname":"/deep-reinforcement-learning/fang-fa/ji-shi-zhan-lve-you-xi/alphastar","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"即时战略游戏"}]},{"id":"-LaNIJ10UeOIEbBrzJJc","title":"团队体育游戏","pathname":"/deep-reinforcement-learning/fang-fa/tuan-dui-ti-yu-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LbRUaRgTR1lZZ0EL6Oy","title":"DDPG + Inverting Gradients","pathname":"/deep-reinforcement-learning/fang-fa/tuan-dui-ti-yu-you-xi/ddpg-+-inverting-gradients","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"团队体育游戏"}]},{"id":"-LbRUsTZqavkaySyuHpq","title":"DDPG + Mixing policy targets","pathname":"/deep-reinforcement-learning/fang-fa/tuan-dui-ti-yu-you-xi/ddpg-+-mixing-policy-targets","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"团队体育游戏"}]},{"id":"-LbRV32AzNB84ieBK8RP","title":"Object-centric prediction","pathname":"/deep-reinforcement-learning/fang-fa/tuan-dui-ti-yu-you-xi/object-centric-prediction","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"团队体育游戏"}]},{"id":"-LaNIJ11pZedxp9rGfLc","title":"文字冒险游戏","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LbRT_IVRLp3XVmevkz0","title":"LSTM-DQN","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi/lstm-dqn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"文字冒险游戏"}]},{"id":"-LbRTd08VVmzTiP5CQi8","title":"DRRN","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi/drrn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"文字冒险游戏"}]},{"id":"-LbRThjcEt-t1NfkRH6H","title":"Affordance Based Action Selection","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi/affordance-based-action-selection","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"文字冒险游戏"}]},{"id":"-LbRTmacJqMXEf7I0Xwp","title":"Golovin","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi/golovin","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"文字冒险游戏"}]},{"id":"-LbRTqLqC5VSyjhgaJUY","title":"AE-DQN","pathname":"/deep-reinforcement-learning/fang-fa/wen-zi-mao-xian-you-xi/ae-dqn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"文字冒险游戏"}]},{"id":"-Lg6OksQYz-CD97tk6Xu","title":"开放的挑战","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"}]},{"id":"-LaNIJ14EXy9lS-EF8Nb","title":"游戏通用性","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/you-xi-tong-yong-xing","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ15Kj7w0X-iZZyd","title":"稀疏、延迟、欺骗性的回报","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/xi-shu-3001-yan-chi-3001-qi-pian-xing-de-hui-bao","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ16_tVy100Yhkyg","title":"多智能体","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/duo-zhi-neng-ti","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ17QbDVN67enHLH","title":"终身适应","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/kuai-su-shi-ying","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ18IBbSsIALK0If","title":"像人类一样玩游戏","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/xiang-ren-lei-yi-yang-wan-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ195ZrP5roPu_6l","title":"可调节的性能等级","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/ke-diao-jie-de-xing-neng-deng-ji","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1AI9hVRl_ltpSm","title":"处理巨大的状态空间","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/chu-li-ju-da-de-zhuang-tai-kong-jian","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1BOO-jlK3G1-GP","title":"工业界应用","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/gong-ye-jie-ying-yong","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1CJBV4x9jh26_7","title":"游戏开发的交互式工具","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/jiao-hu-shi-you-xi-kai-fa","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1D4NlDjkbZT3WE","title":"创造新的游戏","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/chuang-zao-xin-de-you-xi","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1E58nUIIgd7EM9","title":"学习游戏的模型","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/xue-xi-you-xi-de-mo-xing","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-LaNIJ1FuqWD73-WNS-x","title":"计算资源","pathname":"/deep-reinforcement-learning/fang-fa/kai-fang-de-tiao-zhan/ji-suan-zi-yuan","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"方法"},{"label":"开放的挑战"}]},{"id":"-Lg6NP_nWO-mI1FO80ki","title":"Distributional RL","pathname":"/deep-reinforcement-learning/fu-lu/distributional-rl","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"}]},{"id":"-Lg6Na7ewtBmBOhOUYzw","title":"QR-DQN","pathname":"/deep-reinforcement-learning/fu-lu/distributional-rl/qr-dqn","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Distributional RL"}]},{"id":"-LflGkhJPxsT2RYYJxUG","title":"Policy Gradient","pathname":"/deep-reinforcement-learning/fu-lu/policy-gradient","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"}]},{"id":"-LaNIJ0b2Li--DZYqiQ_","title":"Off-Policy Actor-Critic","pathname":"/deep-reinforcement-learning/fu-lu/policy-gradient/off-policy-actor-critic","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Policy Gradient"}]},{"id":"-LfENevN9IrJEerS9mF8","title":"Generalized Advantage Estimation","pathname":"/deep-reinforcement-learning/fu-lu/policy-gradient/advantage-estimation","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Policy Gradient"}]},{"id":"-LfEC-68ii9XrZfv07YZ","title":"Soft Actor-Critic","pathname":"/deep-reinforcement-learning/fu-lu/policy-gradient/soft-actor-critic","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Policy Gradient"}]},{"id":"-LflOY8zc8W4X0Kgk_Gk","title":"PPO-Penalty","pathname":"/deep-reinforcement-learning/fu-lu/policy-gradient/ppo-penalty","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Policy Gradient"}]},{"id":"-LfH8739jyj2wLwiTVyc","title":"Model-Based RL","pathname":"/deep-reinforcement-learning/fu-lu/model-based-rl","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"}]},{"id":"-Lg6MmQvjdYHtb2SNXXc","title":"I2A","pathname":"/deep-reinforcement-learning/fu-lu/model-based-rl/i2a","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Model-Based RL"}]},{"id":"-LfHAcq1APQxrW7E1ICk","title":"MBMF","pathname":"/deep-reinforcement-learning/fu-lu/model-based-rl/mbmf","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Model-Based RL"}]},{"id":"-LfH8nJcMxn2KKiDbT_v","title":"MBVE","pathname":"/deep-reinforcement-learning/fu-lu/model-based-rl/mbve","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Model-Based RL"}]},{"id":"-LfH8SC89-hPQqaH0XLw","title":"World Models","pathname":"/deep-reinforcement-learning/fu-lu/model-based-rl/world-models","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Model-Based RL"}]},{"id":"-Lg6Mj2HX1IA4k3JJcV7","title":"Imitation Learning and Inverse Reinforcement Learning","pathname":"/deep-reinforcement-learning/fu-lu/imitation-learning-and-inverse-reinforcement-learning","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"}]},{"id":"-Lg6MrAEqnNCQjbuOzkr","title":"GAIL","pathname":"/deep-reinforcement-learning/fu-lu/imitation-learning-and-inverse-reinforcement-learning/gail","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Imitation Learning and Inverse Reinforcement Learning"}]},{"id":"-Lg6NlDuI8XwlEQcugsx","title":"Transfer and Multitask RL","pathname":"/deep-reinforcement-learning/fu-lu/transfer-and-multitask-rl","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"}]},{"id":"-Lg6NnE8qTI9Ww78tgv3","title":"HER","pathname":"/deep-reinforcement-learning/fu-lu/transfer-and-multitask-rl/her","siteSpaceId":"sitesp_UzNmo","description":"","breadcrumbs":[{"label":"附录"},{"label":"Transfer and Multitask RL"}]}]}