references.bib

@misc{skryagin2024asn,
  Anote = {./images/answer_set_networks.png},
  title={Answer Set Networks: Casting Answer Set Programming into Deep Learning}, 
  author={Arseny Skryagin and Daniel Ochs and Phillip Deibert and Simon Kohaut and Devendra Singh Dhami and Kristian Kersting},
  Note={Although Answer Set Programming (ASP) allows constraining neural-symbolic (NeSy) systems, its employment is hindered by the prohibitive costs of computing stable models and the CPU-bound nature of state-of-the-art solvers. To this end, we propose Answer Set Networks (ASN), a NeSy solver. Based on Graph Neural Networks (GNN), ASNs are a scalable approach to ASP-based Deep Probabilistic Logic Programming (DPPL). Specifically, we show how to translate ASPs into ASNs and demonstrate how ASNs can efficiently solve the encoded problem by leveraging GPU's batching and parallelization capabilities. Our experimental evaluations demonstrate that ASNs outperform state-of-the-art CPU-bound NeSy systems on multiple tasks. Simultaneously, we make the following two contributions based on the strengths of ASNs. Namely, we are the first to show the finetuning of Large Language Models (LLM) with DPPLs, employing ASNs to guide the training with logic. Further, we show the "constitutional navigation" of drones, i.e., encoding public aviation laws in an ASN for routing Unmanned Aerial Vehicles in uncertain environments.}, 
  Keywords={Answer Set Programming, Deep Learning, Neuro-Symbolic AI, Large Language Models},
  Crossref={https://github.com/ml-research/answersetnetworks},
  year={2024},
  eprint={2412.14814},
  Howbulished={arXiv preprint arXiv:2412.14814},
  archivePrefix={arXiv},
  primaryClass={cs.AI},
  url={https://arxiv.org/abs/2412.14814}, 
}

@article{helff2024vlol,
  Anote = {./images/helff2024vlol.png},
  title = {V-LoL: A Diagnostic Dataset for Visual Logical Learning},
  author={Lukas Helff and Wolfgang Stammer and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
  journal = {Journal of Data-centric Machine Learning Research (DMLR)},
  Note = {Despite the successes of recent developments in visual AI, different shortcomings still exist; from missing exact logical reasoning, to abstract generalization abilities, to understanding complex and noisy scenes. Unfortunately, existing benchmarks, were not designed to cap- ture more than a few of these aspects. Whereas deep learning datasets focus on visually complex data but simple visual reasoning tasks, inductive logic datasets involve complex logical learning tasks, however, lack the visual component. To address this, we propose the diagnostic visual logical learning dataset, V-LoL, that seamlessly combines visual and logical challenges. Notably, we introduce the first instantiation of V-LoL, V-LoL-Train, -- a visual rendition of a classic benchmark in symbolic AI, the Michalski train problem. By incorporating intricate visual scenes and flexible logical reasoning tasks within a versatile framework, V-LoL-Train provides a platform for investigating a wide range of visual logical learning challenges. We evaluate a variety of AI systems including traditional symbolic AI, neural AI, as well as neuro-symbolic AI. Our evaluations demonstrate that even SOTA AI faces difficulties in dealing with visual logical learning challenges, highlighting unique advantages and limitations of each methodology. Overall, V-LoL opens up new avenues for understanding and enhancing current abilities in visual logical learning for AI systems.},
  Keywords = {Neuro-symbolic AI, Deep Learning, Object-centric Learning, Benchmark, Michalski Train},
  year={2024},
  pages={},
  crossref = {https://sites.google.com/view/v-lol},
  url={https://openreview.net/pdf?id=IkbFIPiqFe}
}

@misc{divo2024forecastingCF,
      anote = {./images/company-fundamentals-forecast.png},
      title={Forecasting Company Fundamentals},
      author={Felix Divo and Eric Endress and Kevin Endler and Kristian Kersting and Devendra Singh Dhami},
      year={2024},
      eprint={2411.05791},
      archivePrefix={arXiv},
      url={https://arxiv.org/abs/2411.05791},
      Howpublished = {arXiv preprint arXiv:2412.05152},
      keywords = {Forecasting, Company Fundamentals, Financial Machine Learning, Quantitative Finance, Value Investing, Factor Investing},
      Note = {Company fundamentals are key to assessing companies' financial and overall success and stability. Forecasting them is important in multiple fields, including investing and econometrics. While statistical and contemporary machine learning methods have been applied to many time series tasks, there is a lack of comparison of these approaches on this particularly challenging data regime. To this end, we try to bridge this gap and thoroughly evaluate the theoretical properties and practical performance of 22 deterministic and probabilistic company fundamentals forecasting models on real company data. We observe that deep learning models provide superior forcasting performance to classical models, in particular when considering uncertainty estimation. To validate the findings, we compare them to human analyst expectations and find that their accuracy is comparable to the automatic forecasts. We further show how these high-quality forecasts can benefit automated stock allocation. We close by presenting possible ways of integrating domain experts to further improve performance and increase reliability.},
}

@misc{kraus2024unitedpretrain,
      anote = {./images/xit-overview.png},
      title={United We Pretrain, Divided We Fail! Representation Learning for Time Series by Pretraining on 75 Datasets at Once},
      author={Maurice Kraus and Felix Divo and David Steinmann and Devendra Singh Dhami and Kristian Kersting},
      year={2024},
      eprint={2402.15404},
      archivePrefix={arXiv},
      Howpublished = {arXiv preprint arXiv:2412.05152},
      url={https://arxiv.org/abs/2402.15404},
      keywords = {machine learning, representation learning, pretraining, time series, multi-dataset training},
      Note = {In natural language processing and vision, pretraining is utilized to learn effective representations. Unfortunately, the success of pretraining does not easily carry over to time series due to potential mismatch between sources and target. Actually, common belief is that multi-dataset pretraining does not work for time series! Au contraire, we introduce a new self-supervised contrastive pretraining approach to learn one encoding from many unlabeled and diverse time series datasets, so that the single learned representation can then be reused in several target domains for, say, classification. Specifically, we propose the XD-MixUp interpolation method and the Soft Interpolation Contextual Contrasting (SICC) loss. Empirically, this outperforms both supervised training and other self-supervised pretraining methods when finetuning on low-data regimes. This disproves the common belief: We can actually learn from multiple time series datasets, even from 75 at once.},
}

@article{strem2025multialarm,
  Anote={./images/strem2025multialarm.png},
  title = {Multimodal transformer for early alarm prediction},
  journal = {Engineering Applications of Artificial Intelligence},
  volume = {139},
  pages = {109643},
  year = {2025},
  issn = {0952-1976},
  doi = {https://doi.org/10.1016/j.engappai.2024.109643},
  url = {https://www.sciencedirect.com/science/article/pii/S0952197624018013},
  author = {Nika Strem and Devendra Singh Dhami and Benedikt Schmidt and Kristian Kersting},
  keywords = {Multimodal transformer, Multimodal fusion, Industrial processes, Alarm management, Alarm prediction},
  Note = {Alarms are an essential part of distributed control systems designed to help plant operators keep the processes stable and safe. In reality, however, alarms are often noisy and thus can be easily overlooked. Early alarm prediction can give the operator more time to assess the situation and introduce corrective actions to avoid downtime and negative impact on human safety and environment. Existing studies on alarm prediction typically rely on signals directly coupled with these alarms. However, using more sources of information could benefit early prediction by letting the model learn characteristic patterns in the interactions of signals and events. Meanwhile, multimodal deep learning has recently seen impressive developments. Combination (or fusion) of modalities has been shown to be a key success factor, yet choosing the best fusion method for a given task introduces a new degree of complexity, in addition to existing architectural choices and hyperparameter tuning. This is one of the reasons why real-world problems are still typically tackled with unimodal approaches. To bridge this gap, we introduce a multimodal Transformer model for early alarm prediction based on a combination of recent events and signal data. The model learns the optimal representation of data from multiple fusion strategies automatically. The model is validated on real-world industrial data. We show that our model is capable of predicting alarms with the given horizon and that the proposed multimodal fusion method yields state-of-the-art predictive performance while eliminating the need to choose among conventional fusion techniques, thus reducing tuning costs and training time.},
  Crossref={}
}

@misc{steinmann2024navigatingshortcutsspuriouscorrelations,
      anote = {./images/steinmann2024navigatingshortcuts.png},
      title={Navigating Shortcuts, Spurious Correlations, and Confounders: From Origins via Detection to Mitigation},
      author={David Steinmann and Felix Divo and Maurice Kraus and Antonia Wüst and Lukas Struppek and Felix Friedrich and Kristian Kersting},
      year={2024},
      eprint={2412.05152},
      archivePrefix={arXiv},
      Howpublished = {arXiv preprint arXiv:2412.05152},
      primaryClass={cs.LG},
      Keywords = {Shortcuts, Spurious Correlations, Clever Hans, Confounder, Detection, Mitigation},
      url={https://arxiv.org/abs/2412.05152},
      Note = {Shortcuts, also described as Clever Hans behavior, spurious correlations, or confounders, present a significant challenge in machine learning and AI, critically affecting model generalization and robustness. Research in this area, however, remains fragmented across various terminologies, hindering the progress of the field as a whole. Consequently, we introduce a unifying taxonomy of shortcut learning by providing a formal definition of shortcuts and bridging the diverse terms used in the literature. In doing so, we further establish important connections between shortcuts and related fields, including bias, causality, and security, where parallels exist but are rarely discussed. Our taxonomy organizes existing approaches for shortcut detection and mitigation, providing a comprehensive overview of the current state of the field and revealing underexplored areas and open challenges. Moreover, we compile and classify datasets tailored to study shortcut learning. Altogether, this work provides a holistic perspective to deepen understanding and drive the development of more effective strategies for addressing shortcuts in machine learning.}
}

@misc{kraus2024xlstmmixermultivariatetimeseries,
      anote = {./images/xlstm-mixer.png},
      title={xLSTM-Mixer: Multivariate Time Series Forecasting by Mixing via Scalar Memories},
      author={Maurice Kraus and Felix Divo and Devendra Singh Dhami and Kristian Kersting},
      year={2024},
      eprint={2410.16928},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      Howpublished = {arXiv preprint arXiv:2410.16928},
      Keywords = {Time Series, Forecasting, xLSTM},
      url={https://arxiv.org/abs/2410.16928},
      Crossref={https://github.com/mauricekraus/xlstm-mixer},
      Note = {Time series data is prevalent across numerous fields, necessitating the development of robust and accurate forecasting models. Capturing patterns both within and between temporal and multivariate components is crucial for reliable predictions. We introduce xLSTM-Mixer, a model designed to effectively integrate temporal sequences, joint time-variate information, and multiple perspectives for robust forecasting. Our approach begins with a linear forecast shared across variates, which is then refined by xLSTM blocks. These blocks serve as key elements for modeling the complex dynamics of challenging time series data. xLSTM-Mixer ultimately reconciles two distinct views to produce the final forecast. Our extensive evaluations demonstrate xLSTM-Mixer's superior long-term forecasting performance compared to recent state-of-the-art methods. A thorough model analysis provides further insights into its key components and confirms its robustness and effectiveness. This work contributes to the resurgence of recurrent models in time series forecasting.}
}


@misc{shindo2024blendrl_arxiv,
  Anote={./images/shindo2024blendrl.png},
  author = {Hikaru Shindo and Quentin Delfosse and Devendra Singh Dhami and Kristian Kersting},
  title = {BlendRL: A Framework for Merging Symbolic and Neural Policy Learning},
  Keywords = {Reinforcement Learning,  Neuro-Symbolic AI, Differentiable Reasoning, Interpretable and Explainable AI},
  Howpublished = {arXiv preprint arXiv:2410.11689},
  year = {2024},
  Url = {https://arxiv.org/abs/2410.11689},
  Pages = {},
  Crossref={https://github.com/ml-research/blendrl},
  Note = {Humans can leverage both abstract reasoning and intuitive reactions. In contrast, reinforcement learning policies are typically encoded in either opaque systems like neural networks or symbolic systems that rely on predefined symbols and rules. This disjointed approach severely limits the agents’ capabilities, as they often lack either the flexible low-level reaction characteristic of neural agents or the interpretable reasoning of symbolic agents. To overcome this challenge, we introduce BlendRL, a neuro-symbolic RL framework that harmoniously integrates both paradigms within RL agents that use mixtures of both logic and neural policies. We empirically demonstrate that BlendRL agents outperform both neural and symbolic baselines in standard Atari environments, and showcase their robustness to environmental changes. Additionally, we analyze the interaction between neural and symbolic policies, illustrating how their hybrid use helps agents overcome each other's limitations.}
}

@inproceedings{hintersdorf2024balancingtransparency,
  anote = {./images/hintersdorf2024balancingtransparency.png},
  author = {Dominik Hintersdorf and Lukas Struppek and Kristian Kersting},
  title = {Balancing Transparency and Risk: An Overview of the Security and Privacy Risks of Open-Source Machine Learning Models},
  year = {2024},
  url = {https://link.springer.com/chapter/10.1007/978-3-031-73741-1_16},
  booktitle = {Bridging the Gap Between AI and Reality: First International Conference (AISoLA)},
  pages = {269–283},
  keywords = {Machine Learning, Security, Privacy, Open-Source},
  note = {The field of artificial intelligence (AI) has experienced remarkable progress in recent years, driven by the widespread adoption of open-source machine learning models in both research&nbsp;and industry. Considering the resource-intensive nature of training&nbsp;on vast datasets, many applications opt for models that have already been trained. Hence, a small number of key players undertake&nbsp;the responsibility of training and publicly releasing large pre-trained models, providing a crucial foundation for a wide range&nbsp;of applications. However, the adoption of these open-source models carries inherent privacy and security risks that are&nbsp;often overlooked. To provide a concrete example, an inconspicuous&nbsp;model may conceal hidden functionalities that, when triggered by specific input patterns, can manipulate the behavior of the system, such&nbsp;as instructing self-driving cars to ignore the presence of&nbsp;other vehicles. The implications of successful privacy and security attacks encompass a broad spectrum, ranging from relatively&nbsp;minor damage like service interruptions to highly alarming scenarios, including physical harm or the exposure of sensitive user data.&nbsp;In this work, we present a comprehensive overview of common privacy&nbsp;and security threats associated with the use of open-source models.&nbsp;By raising awareness of these dangers, we strive to promote&nbsp;the responsible and secure use of AI systems.},
}

@misc{haerle2024scarsparseconditionedautoencoders,
      anote={./images/haerle2024scar.png},
      title={SCAR: Sparse Conditioned Autoencoders for Concept Detection and Steering in LLMs},
      author={Ruben Härle and Felix Friedrich and Manuel Brack and Björn Deiseroth and Patrick Schramowski and Kristian Kersting},
      year={2024},
      Howpublished={arXiv preprint arXiv:2411.07122},
      url={https://arxiv.org/pdf/2411.07122},
      Keywords = {Large Language Models, Concept Steering, Sparse Autoencoder, AI Safety, SAEs, Mechanistic Interpretability},
      Note = {Large Language Models (LLMs) have demonstrated remarkable capabilities in generating human-like text, but their output may not be aligned with the user or even produce harmful content.
      This paper presents a novel approach to detect and steer concepts such as toxicity before generation. We introduce the Sparse Conditioned Autoencoder (SCAR), a single trained module that
      extends the otherwise untouched LLM. SCAR ensures full steerability, towards and away from concepts (e.g., toxic content),
      without compromising the quality of the model's text generation on standard evaluation benchmarks. We demonstrate the effective
      application of our approach through a variety of concepts, including toxicity, safety, and writing style alignment. As such, this work establishes a robust framework for
      controlling LLM generations, ensuring their ethical and safe deployment in real-world applications.}
}


@incollection{wuest2024bongard,
  Anote={./images/wuest2024bongard.png},
  title = {Bongard in Wonderland: Visual Puzzles That Still Make AI Go Mad?},
  author = {Wuest, Antonia and Tobiasch, Tim and Helff, Lukas and Dhami, Devendra S. and Rothkopf, Constantin A. and Kersting, Kristian},
  booktitle = {Working Notes of the NeurIPS Workshop on System-2 Reasoning at Scale},
  year = {2024},
  Url = {https://openreview.net/pdf?id=4Yv9tFHDwX},
  Keywords = {Cognitive Science, Benchmark, Bongrads, Vision-Language Models},
  Note = {Recently, newly developed Vision-Language Models (VLMs), such as OpenAI’s GPT-4o, have
emerged, seemingly demonstrating advanced reasoning capabilities across text and image modalities.
Yet, the depth of these advances in language-guided perception and abstract reasoning remains
underexplored, and it is unclear whether these models can truly live up to their ambitious promises.
To assess the progress and identify shortcomings, we enter the wonderland of Bongard problems, a
set of classical visual reasoning puzzles that require human-like abilities of pattern recognition and
abstract reasoning. While VLMs occasionally succeed in identifying discriminative concepts and
solving some of the problems, they frequently falter, failing to understand and reason about visual
concepts. Surprisingly, even elementary concepts that may seem trivial to humans, such as simple
spirals, pose significant challenges. Moreover, even when asked to explicitly focus on and analyze
these concepts, they continue to falter, suggesting not only a lack of understanding of these elementary
visual concepts but also an inability to generalize to unseen concepts. These observations underscore
the current limitations of VLMs, emphasize that a significant gap remains between human-like visual
reasoning and machine cognition, and highlight the ongoing need for innovation in this area.}
}

@article{strem2025apt,
  Anote={./images/strem2025apt.png},
  title = {APT: Alarm Prediction Transformer},
  author = {Nika Strem and Devendra Singh Dhami and Benedikt Schmidt and Benjamin Kloepper and Kristian Kersting},
  Journal = {Expert System Application},
  year = {2025},
  Note = {Distributed control systems (DCS) are essential to operate complex industrial processes. A major part of a DCS is the alarm system, which helps plant operators to keep the processes stable and safe. Alarms are defined as threshold values on individual signals taking into account minimum reaction time of the human operator. In reality, however, alarms are often noisy and overwhelming, and thus can be easily overlooked by the operators. Early alarm prediction can give the operator more time to react and introduce corrective actions to avoid downtime and negative impact on human safety and the environment. In this context, we introduce Alarm Prediction Transformer (APT), a multimodal Transformer-based machine learning model for early alarm prediction based on the combination of recent events and signal data. Specifically, we propose two novel fusion strategies and three methods of label encoding with various levels of granularity. Given a window of several minutes of event logs and signal data, our model predicts whether an alarm is going to be triggered after a few minutes and, if yes, it also predicts its location. Our experiments on two novel real industrial plant data sets and a simulated data set show that the model is capable of predicting alarms with the given horizon and that our proposed fusion technique combining inputs from different modalities, i. e. events and signals, yields more accurate results than any of the modalities alone or conventional fusion techniques.},
  Publisher = {Springer},
  Keywords = {Machine Learning, Deep Learning, Industrial Processes, Alarm Management, Multimodal Transformer, Multimodal Fusion},
  Url = {https://www.sciencedirect.com/science/article/pii/S0957417424023881},
  Crossref={}
}


@inproceedings{shindo2024deisam,
  Anote={./images/shindo2024deisam.png},
  author = {Hikaru Shindo and Manuel Brack and Gopika Sudhakaran and Devendra Singh Dhami and Patrick Schramowski and Kristian Kersting},
  title = {DeiSAM: Segment Anything with Deictic Prompting},
  year = {2024},
  Url = {https://arxiv.org/abs/2402.14123},
  Pages = {},
  booktitle = {Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS)},
  Note = {Large-scale, pre-trained neural networks have demonstrated strong capabilities in various tasks, including zero-shot image segmentation. To identify concrete objects in complex scenes, humans instinctively rely on deictic descriptions in natural language, i.e. , referring to something depending on the con- text, e.g. ”The object that is on the desk and behind the cup.”. However, deep learning approaches cannot reliably interpret these deictic representations due to their lack of reasoning capabilities in complex scenarios. To remedy this issue, we propose DeiSAM, which integrates large pre-trained neural networks with differentiable logic reasoners. Given a complex, textual segmentation description, DeiSAM leverages Large Language Models (LLMs) to generate first-order logic rules and performs differentiable forward reasoning on generated scene graphs. Subsequently, DeiSAM segments objects by matching them to the logically inferred image regions. As part of our evaluation, we propose the Deictic Visual Genome (DeiVG) dataset, containing paired visual input and complex, deictic textual prompts. Our empirical results demonstrate that DeiSAM is a substantial improvement over data-driven neural baselines on deictic segmentation tasks.},
  Keywords = {Neuro-Symbolic AI, Differentiable Reasoning, Segmentation, Textual Grounding}
}

@inproceedings{hintersdorf24nemo,
  Anote={./images/hintersdorf2024nemo.png},
  author = {Dominik Hintersdorf and Lukas Struppek and Kristian Kersting and Adam Dziedzic and Franziska Boenisch},
  title = {Finding NeMo: Localizing Neurons Responsible For Memorization in Diffusion Models},
  year = {2024},
  Url = {https://arxiv.org/abs/2406.02366},
  Pages = {},
  booktitle = {Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS)},
  Note = {Diffusion models (DMs) produce very detailed and high-quality images, achieved through rigorous training on huge datasets. Unfortunately, this practice raises privacy and intellectual property concerns, as DMs can memorize and later reproduce their potentially sensitive or copyrighted training images at inference time. Prior efforts to prevent this issue are viable when the DM is developed and deployed in a secure and constantly monitored environment. However, they hold the risk of adversaries circumventing the safeguards and are not effective when the DM itself is publicly released. To solve the problem, we introduce NeMo, the first method to localize memorization of individual data samples down to the level of neurons in DMs' cross-attention layers. Through our experiments, we make the intriguing finding that in many cases, single neurons are responsible for memorizing particular training samples. By deactivating these memorization neurons, we avoid replication of training data at inference time, increase the diversity in the generated outputs, and mitigate the leakage of sensitive data.},
  Keywords = {Memorization, Diffusion Models, Stable Diffusion}
}


@inproceedings{stammer2024ncb,
    anote={./images/stammer2024neural.png},
    title={Neural Concept Binder},
    author={Wolfgang Stammer and Antonia Wüst and David Steinmann and Kristian Kersting},
    booktitle = {Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS)},
    Pages={},
    Crossref={https://github.com/ml-research/neuralconceptbinder},
    Url={https://arxiv.org/pdf/2406.09949},
    year={2024},
    note={The challenge in object-based visual reasoning lies in generating descriptive yet distinct concept representations. Moreover, doing this in an unsupervised fashion requires human users to understand a model's learned concepts and potentially revise false concepts. In addressing this challenge, we introduce the Neural Concept Binder, a new framework for deriving discrete concept representations resulting in what we term "concept-slot encodings". These encodings leverage both "soft binding" via object-centric block-slot encodings and "hard binding" via retrieval-based inference. The Neural Concept Binder facilitates straightforward concept inspection and direct integration of external knowledge, such as human input or insights from other AI models like GPT-4. Additionally, we demonstrate that incorporating the hard binding mechanism does not compromise performance; instead, it enables seamless integration into both neural and symbolic modules for intricate reasoning tasks, as evidenced by evaluations on our newly introduced CLEVR-Sudoku dataset.},
    Keywords={Concept Discovery, Interpretable Artificial Intelligence, Interactive Machine Learning, Disentanglement}
}


@inproceedings{delfosse2024interpretable,
  Anote = {./images/delfosse2024interpretable.png},
  title={Interpretable concept bottlenecks to align reinforcement learning agents},
  author={Quentin Delfosse and Sebastian Sztwiertnia and Wolfgang Stammer and Mark Rothermel and Kristian Kersting},
  booktitle = {Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS)},
  year = {2024},
  Url = {https://arxiv.org/pdf/2401.05821v2.pdf},
  Pages = {},
  Note = {Goal misalignment, reward sparsity and difficult credit assignment are only a few of the many issues that make it difficult for deep reinforcement learning (RL) agents to learn optimal policies. Unfortunately, the black-box nature of deep neural networks impedes the inclusion of domain experts for inspecting the model and revising suboptimal policies. To this end, we introduce *Successive Concept Bottleneck Agents* (SCoBots), that integrate consecutive concept bottleneck (CB) layers. In contrast to current CB models, SCoBots do not just represent concepts as properties of individual objects, but also as relations between objects which is crucial for many RL tasks. Our experimental results provide evidence of SCoBots' competitive performances, but also of their potential for domain experts to understand and regularize their behavior. Among other things, SCoBots enabled us to identify a previously unknown misalignment problem in the iconic video game, Pong, and resolve it. Overall, SCoBots thus result in more human-aligned RL agents.},
  Keywords = {Reinforcement Learning, Transparent agents, Interpretability, Concept Bottlebecks}
}


@inproceedings{skryagin2024cna,
  Anote={./images/skryagin2024cna.png},
  author = {Arseny Skryagin and Felix Divo and Mohammad Amin Ali and Devendra Singh Dhami and Kristian Kersting},
  title = {Graph Neural Networks Need Cluster-Normalize-Activate Modules},
  year = {2024},
  Url = {},
  Pages = {},
  booktitle = {Proceedings of the 38th Conference on Neural Information Processing Systems (NeurIPS)},
  Note = {Graph Neural Networks (GNNs) are non-Euclidean deep learning models for graph-structured data. Despite their successful and diverse applications, oversmoothing prohibits deep architectures due to node features converging to a single fixed point. This severely limits their potential to solve complex tasks. To counteract this tendency, we propose a plug-and-play module consisting of three steps: Cluster→Normalize→Activate (CNA). By applying CNA modules, GNNs search and form super nodes in each layer, which are normalized and activated individually. We demonstrate in node classification and property prediction tasks that CNA significantly improves the accuracy over the state-of-the-art. Particularly, CNA reaches 94.18% and 95.75% accuracy on Cora and Citeseer, respectively. It further benefits GNNs in regression tasks as well, reducing the mean squared error compared to all baselines. At the same time, GNNs with CNA require substantially fewer learnable parameters than competing architectures.},
  Crossref={https://github.com/ml-research/cna_modules},
  Keywords = {Graph Neural Networks, Deep Geometric Learning, Learnable Activation Functions, Oversmoothing}
}

@inproceedings{deiseroth2024emnlp,
      title={T-FREE: Subword Tokenizer-Free Generative LLMs via Sparse Representations for Memory-Efficient Embeddings},
      author={Björn Deiseroth and Manuel Brack and Patrick Schramowski and Kristian Kersting and Samuel Weinbach},
      year={2024},
      booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
      Keywords={Large Language Models, Tokenizers, Sparse Representations, Memory-Efficient Embeddings},
      Note={Tokenizers are crucial for encoding information in Large Language Models, but their development has recently stagnated, and they contain inherent weaknesses. Major limitations include computational overhead, ineffective vocabulary use, and unnecessarily large embedding and head layers. Additionally, their performance is biased towards a reference corpus, leading to reduced effectiveness for underrepresented languages.
      To remedy these issues, we propose T-FREE, which directly embeds words through sparse activation patterns over character triplets, and does not require a reference corpus. T-FREE inherently exploits morphological similarities and allows for strong compression of embedding layers. In our exhaustive experimental evaluation, we achieve competitive downstream performance with a parameter reduction of more than 85% on these layers. Further, T-FREE shows significant improvements in cross-lingual transfer learning.},
      Anote={./images/deiseroth2024tfree.png},
      url={../../papers/deiseroth2024emnlp.pdf}
}

@inproceedings{nakamura2024aurora,
  author={Taishi Nakamura and Mayank Mishra and Simone Tedeschi and Yekun Chai and Jason T. Stillerman and Felix Friedrich and Prateek Yadav and Tanmay Laud and Vu Minh Chien and Terry Yue Zhuo and Diganta Misra and Ben Bogin and Xuan-Son Vu and Marzena Karpinska and Arnav Varma Dantuluri and Wojciech Kusa and Tommaso Furlanello and Rio Yokota and Niklas Muennighoff and Suhas Pai and Tosin Adewumi and Veronika Laippala and Xiaozhe Yao and Adalberto Junior and Alpay Ariyak and Aleksandr Drozd and Jordan Clive and Kshitij Gupta and Liangyu Chen and Qi Sun and Ken Tsui and Noah Persaud and Nour Fahmy and Tianlong Chen and Mohit Bansal and Nicolo Monti and Tai Dang and Ziyang Luo and Tien-Tung Bui and Roberto Navigli and Virendra Mehta and Matthew Blumberg and Victor May and Huu Nguyen and Sampo Pyysalo},
  title = {Aurora-M: Open Source Continual Pre-training for Multilingual Language and Code},
  year = {2024},
  booktitle = {The 31st International Conference on Computational Linguistics (COLING)},
  keywords = {Multilingual, Continual, Pre-training, Safety, Fairness, Dataset, Red Teaming, Alignment, Regulations, Policy},
  Note = {Pretrained language models underpin several AI applications, but their high computational cost for training limits accessibility. Initiatives such as BLOOM and StarCoder aim to democratize access to pretrained models for collaborative community development. However, such existing models face challenges: limited multilingual capabilities, continual pretraining causing catastrophic forgetting, whereas pretraining from scratch is computationally expensive, and compliance with AI safety and development laws. This paper presents Aurora-M, a 15B parameter multilingual open-source model trained on English, Finnish, Hindi, Japanese, Vietnamese, and code. Continually pretrained from StarCoderPlus on 435 billion additional tokens, Aurora-M surpasses 2 trillion tokens in total training token count. It is the first open-source multilingual model fine-tuned on human-reviewed safety instructions, thus aligning its development not only with conventional red-teaming considerations, but also with the specific concerns articulated in the Biden-Harris Executive Order on the Safe, Secure, and Trustworthy Development and Use of Artificial Intelligence. Aurora-M is rigorously evaluated across various tasks and languages, demonstrating robustness against catastrophic forgetting and outperforming alternatives in multilingual settings, particularly in safety evaluations. To promote responsible open-source LLM development, Aurora-M and its variants are released publicly.},
  Anote = {./images/aurora.png},
  url = {https://arxiv.org/pdf/2404.00399}
}

@misc{tedeschi2024redteam,
  author = {Simone Tedeschi and Felix Friedrich and Dung Nguyen and Nam Pham and Tanmay Laud and Chien Vu and Terry Yue Zhuo and Ziyang Luo and Ben Bogin and Tien-Tung Bui and Xuan-Son Vu and Paulo Villegas and Victor May and Huu Nguyen},
  title = {Biden-Harris Redteam Dataset: A red-teaming dataset focusing on concerns in the Biden-Harris AI Executive Order},
  year = 2024,
  keywords = {Safety, Fairness, Dataset, Red Teaming, Alignment, Regulations, Policy},
  Anote = {./images/aurora.png},
  Howpublished = {Available at Hugging Face: https://huggingface.co/datasets/aurora-m/biden-harris-redteam},
  url = {https://huggingface.co/datasets/aurora-m/biden-harris-redteam}
}


@inproceedings{busch2024net,
  title={Phi-net: Efficient Causal Modeling at Scale},
  author={Florian Peter Busch and Moritz Willig and Jonas Seng and Kristian Kersting and Devendra Singh Dhami},
  booktitle={Proceedings of the International Conference on Probabilistic Graphical Models (PGM)},
  pages={452--469},
  year={2024},
  publisher={PMLR},
  Keywords={Causal ML, Probabilistic Circuits, Neural Causal Models, Large-Scale Inference},
  Anote={./images/busch2024net.png},
  url={https://www.socsci.ru.nl/johank/pgm2024/busch24.pdf},
  note={Being a ubiquitous aspect of human cognition, causality has made its way into modern-day machine-learning research. Despite its importance in real-world applications, contemporary research still struggles with high-dimensional causal problems. Leveraging the efficiency of probabilistic circuits, which offer tractable computation of marginal probabilities, we introduce net, a probabilistic model designed for large-scale causal inference. net is a type of sum-product network where layering and the einsum operation allow for efficient parallelization. By incorporating interventional data into the learning process, the model can learn the effects of interventions and make predictions based on the specific interventional setting. Overall, net is a causal probabilistic circuit that efficiently answers causal queries in large-scale problems. We present evaluations conducted on both synthetic data and a substantial real-world dataset.}
}

@misc{brack2024communityoscar,
      title={Community OSCAR: A Community Effort for Multilingual Web Data},
      author={Manuel Brack and Malte Ostendorff and Pedro Ortiz Suarez and José Javier Saiz and Iñaki Lacunza Castilla and Jorge Palomar-Giner and Patrick Schramowski and Georg Rehm and Marta Villegas and Kristian Kersting},
      year={2024},
      Howpublished={Technical Report / Preprint},
      Keywords={Large-scale Data, Dataset, LLM training, LLM, Multilingual},
      Note={The development of large language models (LLMs) relies heavily on extensive, high-quality datasets. Publicly available datasets focus predominantly on English, leaving other language communities behind. To address this issue, we introduce Community OSCAR, a multilingual dataset initiative designed to address the gap between English and non-English data availability. Through a collective effort, Community OSCAR covers over 150 languages with 45 billion documents, totaling over 345 TiB of data. Initial results indicate that Community OSCAR provides valuable raw data for training LLMs and enhancing the performance of multilingual models. This work aims to contribute to the ongoing advancements in multilingual NLP and to support a more inclusive AI ecosystem by making high-quality, multilingual data more accessible to those working with low-resource languages.},
      Anote={./images/brack2024communityoscar.png},
      url={https://occiglot.eu/papers/Community_Oscar.pdf}
}

@article{shindo2024neumann,
  Anote={./images/shindo2023neumann.png},
  title = {Learning Differentiable Logic Programs for Abstract Visual Reasoning},
  author = {Hikaru Shindo and Viktor Pfanschilling and Devendra Singh Dhami and Kristian Kersting},
  Journal = {Machine Learning Journal (MLJ)},
  year = {2024},
  Note = {Visual reasoning is essential for building intelligent agents that understand the world and perform problem-solving beyond perception. Differentiable forward reasoning has been developed to integrate reasoning with gradient-based machine learning paradigms. However, due to the memory intensity, most existing approaches do not bring the best of the expressivity of first-order logic, excluding a crucial ability to solve abstract visual reasoning, where agents need to perform reasoning by using analogies on abstract concepts in different scenarios. To overcome this problem, we propose NEUro-symbolic Message-pAssiNg reasoNer (NEUMANN), which is a graph-based differentiable forward reasoner, passing messages in a memory-efficient manner and handling structured programs with functors. Moreover, we propose a computationally-efficient structure learning algorithm to perform explanatory program induction on complex visual scenes. To evaluate, in addition to conventional visual reasoning tasks, we propose a new task, visual reasoning behind-the-scenes, where agents need to learn abstract programs and then answer queries by imagining scenes that are not observed. We empirically demonstrate that NEUMANN solves visual reasoning tasks efficiently, outperforming neural, symbolic, and neuro-symbolic baselines.},
  Publisher = {Springer},
  Keywords = {Differentiable Reasoning,  Inductive Logic Programming,  Neuro-Symbolic AI,  Object-centric Learning, Graph Neural Network},
  Url = {https://arxiv.org/pdf/2307.00928},
  Crossref={https://sites.google.com/view/neumann-tuda}
}

@misc{brack2024unleashing,
      title={Unleashing Creativity: Generalizing Semantic Control for Text-to-Image Diffusion Models},
      author={Manuel Brack and Marlon May and Linoy Tsaban and Felix Friedrich and Patrick Schramowski and Apolinaros Passos and Kristian Kersting },
      year={2024},
      Howpublished={Technical Report / Preprint},
      Keywords={Text-to-Image Synthesis, Text-Guided Image Generation, SEGA, Semantic Control, Diffusion Transformers},
      Note={The recent surge in popularity of text-to-image diffusion models (DMs) can largely be attributed to the versatile, expressive, and intuitive user interfaces provided through textual prompts. These models enable inexperienced people to explore artistic ventures easily and provide exciting new opportunities to experienced artists. However, the semantic control offered through text prompts alone is limited and rather fragile, and overall lacks the fine granularity necessary for creative applications. The majority of methods addressing this issue are restricted to specific DM architectures, severely limiting the creative workflow instead of generalizing it to arbitrary models. In contrast, we demonstrate that semantic guidance (SEGA) generalizes to any DM architecture. Importantly, SEGA is natively compatible with state-of-the-art diffusion transformers. Our empirical results show strong model-agnostic performance, and we highlight new creative possibilities enabled by SEGA, such as enhanced typographic manipulations. This work underscores SEGA’s potential to provide consistent, high-quality semantic guidance in a rapidly evolving generative model landscape.},
      Anote={./images/brack2024unleashing.png},
      url={https://www.aiml.informatik.tu-darmstadt.de/papers/brack2024unleashing.pdf}},
}

@misc{deiseroth2024tfree,
      title={T-FREE: Tokenizer-Free Generative LLMs via Sparse Representations for Memory-Efficient Embeddings},
      author={Björn Deiseroth and Manuel Brack and Patrick Schramowski and Kristian Kersting and Samuel Weinbach},
      year={2024},
      Howpublished={arXiv preprint arXiv:2406.19223},
      Keywords={Large Language Models, Tokenizers, Sparse Representations, Memory-Efficient Embeddings},
      Note={Tokenizers are crucial for encoding information in Large Language Models, but their development has recently stagnated, and they contain inherent weaknesses. Major limitations include computational overhead, ineffective vocabulary use, and unnecessarily large embedding and head layers. Additionally, their performance is biased towards a reference corpus, leading to reduced effectiveness for underrepresented languages.
To remedy these issues, we propose T-FREE, which directly embeds words through sparse activation patterns over character triplets, and does not require a reference corpus. T-FREE inherently exploits morphological similarities and allows for strong compression of embedding layers. In our exhaustive experimental evaluation, we achieve competitive downstream performance with a parameter reduction of more than 85% on these layers. Further, T-FREE shows significant improvements in cross-lingual transfer learning.},
      Anote={./images/deiseroth2024tfree.png},
      url={https://arxiv.org/abs/2406.19223},
}

@article{friedrich2024fair,
      Anote = {./images/ffriedrich_fair_2023.png},
      title={Auditing and Instructing Text-to-Image Generation Models on Fairness},
      author={Felix Friedrich and Manuel Brack and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Sasha Luccioni and Kristian Kersting},
      Journal = {AI and Ethics},
      year = {2024},
      Note = {Generative AI models have recently achieved astonishing results in quality and are consequently employed in a fast-growing number of applications. However, since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer from degenerated and biased human behavior, as we demonstrate. In fact, they may even reinforce such biases. To not only uncover but also combat these undesired effects, we present a novel strategy, called Fair Diffusion, to attenuate biases after the deployment of generative text-to-image models. Specifically, we demonstrate shifting a bias, based on human instructions, in any direction yielding arbitrarily new proportions for, e.g., identity groups. As our empirical evaluation demonstrates, this introduced control enables instructing generative image models on fairness, with no data filtering and additional training required.},
      Publisher = {Springer},
      Keywords = {Fairness, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, AI Ethics},
      Url={https://link.springer.com/content/pdf/10.1007/s43681-024-00531-5.pdf},
      doi={https://doi.org/10.1007/s43681-024-00531-5}
}

@incollection{struppeknemofmw,
  Anote={./images/hintersdorf2024nemo.png},
  author = {Lukas Struppek and Dominik Hintersdorf and Kristian Kersting and Adam Dziedzic and Franziska Boenisch},
  title = {Finding NeMo: Localizing Neurons Responsible For Memorization in Diffusion Models},
  year = {2024},
  Url = {https://openreview.net/pdf?id=5wOrSneuwe},
  Pages = {},
  booktitle={Working Notes of the ICML 2024 Workshop on Foundation Models in the Wild},
  Note = {Diffusion models (DMs) produce very detailed and high-quality images, achieved through rigorous training on huge datasets. Unfortunately, this practice raises privacy and intellectual property concerns, as DMs can memorize and later reproduce their potentially sensitive or copyrighted training images at inference time. Prior efforts to prevent this issue are viable when the DM is developed and deployed in a secure and constantly monitored environment. However, they hold the risk of adversaries circumventing the safeguards and are not effective when the DM itself is publicly released. To solve the problem, we introduce NeMo, the first method to localize memorization of individual data samples down to the level of neurons in DMs' cross-attention layers. Through our experiments, we make the intriguing finding that in many cases, single neurons are responsible for memorizing particular training samples. By deactivating these memorization neurons, we avoid replication of training data at inference time, increase the diversity in the generated outputs, and mitigate the leakage of sensitive data.},
  Keywords = {Memorization, Diffusion Models, Stable Diffusion}
}

@misc{solaiman2024evaluatingsocialimpactgenerative,
    	Anote={./images/defending_with_backdoors.png},
      title={Evaluating the Social Impact of Generative AI Systems in Systems and Society},
      author={Irene Solaiman and Zeerak Talat and William Agnew and Lama Ahmad and Dylan Baker and Su Lin Blodgett and Canyu Chen and Hal Daumé III au2 and Jesse Dodge and Isabella Duan and Ellie Evans and Felix Friedrich and Avijit Ghosh and Usman Gohar and Sara Hooker and Yacine Jernite and Ria Kalluri and Alberto Lusoli and Alina Leidinger and Michelle Lin and Xiuzhu Lin and Sasha Luccioni and Jennifer Mickel and Margaret Mitchell and Jessica Newman and Anaelia Ovalle and Marie-Therese Png and Shubham Singh and Andrew Strait and Lukas Struppek and Arjun Subramonian},
      year={2024},
      Howpublished={arXiv preprint arXiv:2306.05949 and to appear in Hacker, Engel, Hammer, Mittelstadt (eds), Oxford Handbook on the Foundations and Regulation of Generative AI. Oxford University Press.},
      url={https://arxiv.org/abs/2306.05949},
      Note = {Generative AI systems across modalities, ranging from text (including code), image, audio, and video, have broad social impacts, but there is no official standard for means of evaluating those impacts or for which impacts should be evaluated. In this paper, we present a guide that moves toward a standard approach in evaluating a base generative AI system for any modality in two overarching categories: what can be evaluated in a base system independent of context and what can be evaluated in a societal context. Importantly, this refers to base systems that have no predetermined application or deployment context, including a model itself, as well as system components, such as training data. Our framework for a base system defines seven categories of social impact: bias, stereotypes, and representational harms; cultural values and sensitive content; disparate performance; privacy and data protection; financial costs; environmental costs; and data and content moderation labor costs. Suggested methods for evaluation apply to listed generative modalities and analyses of the limitations of existing evaluations serve as a starting point for necessary investment in future evaluations. We offer five overarching categories for what can be evaluated in a broader societal context, each with its own subcategories: trustworthiness and autonomy; inequality, marginalization, and violence; concentration of authority; labor and creativity; and ecosystem and environment. Each subcategory includes recommendations for mitigating harm.},
      Keywords = {Generative AI, Social Impact, Ethical AI, Fairness, Accountability, Transparency}
}

@incollection{delfosse2024ocalm,
  Anote={./images/delfosse2024ocalm.png},
	title={OCALM: Object-Centric Assessment with Language Models},
	author={Timo Kaufmann and Jannis Blüml and Antonia Wüst and Quentin Delfosse and Kristian Kersting and Eyke Hüllermeier},
	year={2024},
  booktitle={Working Notes of the RLC 2024 Workshop on Reinforcement Learning Beyond Rewards},
	url={https://rlbrew-workshop.github.io/papers/40_ocalm_object_centric_assessmen.pdf},
  Note = {Properly defining a reward signal to efficiently train a reinforcement learning (RL) agent is a challenging task. Designing balanced objective functions from which a desired behavior can emerge requires expert knowledge, especially for complex environments. Learning rewards from human feedback or using large language models (LLMs) to directly provide rewards are promising alternatives, allowing non-experts to specify goals for the agent. However, black-box reward models make it difficult to debug the reward. In this work, we propose Object-Centric Assessment with Language Models (OCALM) to derive inherently interpretable reward functions for RL agents from natural language task descriptions. OCALM uses the extensive world-knowledge of LLMs while leveraging the object-centric nature common to many environments to derive reward functions focused on relational concepts, providing RL agents with the ability to derive policies from task descriptions.},
  Keywords = {Deep Reinforcement Learning, LLM, Atari, Arcade Games, Reward Modification}
}

@incollection{delfosse2024hackatari,
  Anote={./images/delfosse2024hackatari.png},
	title={HackAtari: Atari Learning Environments for Robust and Continual Reinforcement Learning},
	author={Quentin Delfosse and Jannis Blüml and Bjarne Gregori and Kristian Kersting},
	year={2024},
  booktitle={Working Notes of the RLC 2024 Worskhop on Interpretable Policies in Reinforcement Learning},
	url={https://openreview.net/pdf?id=Th5OOmiHVo},
  Crossref = {https://github.com/k4ntz/HackAtari},
	Note = {Artificial agents' adaptability to novelty and alignment with intended behavior is crucial for their effective deployment. Reinforcement learning (RL) leverages novelty as a means of exploration, yet agents often struggle to handle novel situations, hindering generalization. To address these issues, we propose HackAtari, a framework introducing controlled novelty to the most common RL benchmark, the Atari Learning Environment. HackAtari allows us to create novel game scenarios (including simplification for curriculum learning), to swap the game elements' colors, as well as to introduce different reward signals for the agent. We demonstrate that current agents trained on the original environments include robustness failures, and evaluate HackAtari's efficacy in enhancing RL agents' robustness and aligning behavior through experiments using C51 and PPO. Overall, HackAtari can be used to improve the robustness of current and future RL algorithms, allowing Neuro-Symbolic RL, curriculum RL, causal RL, as well as LLM-driven RL. Our work underscores the significance of developing interpretable in RL agents.},
Keywords = {Deep Reinforcement Learning, Object-centric Deep Learning, Atari, Arcade Games, Novelty, Continual Learning, Robustness}
}

@inproceedings{hintersdorf24defending,
	Anote={./images/defending_with_backdoors.png},
	title={Defending Our Privacy With Backdoors},
	author={Dominik Hintersdorf and Lukas Struppek and Daniel Neider and Kristian Kersting},
	year={2024},
  booktitle = {Proceedings of the 27th European Conference on Artificial Intelligence (ECAI)},
	url={https://arxiv.org/pdf/2310.08320.pdf},
	Note = {The proliferation of large AI models trained on uncurated, often sensitive web-scraped data has raised significant privacy concerns. One of the concerns is that adversaries can extract information about the training data using privacy attacks. Unfortunately, the task of removing specific information from the models without sacrificing performance is not straightforward and has proven to be challenging. We propose a rather easy yet effective defense based on backdoor attacks to remove private information such as names and faces of individuals from vision-language models by fine-tuning them for only a few minutes instead of re-training them from scratch. Specifically, through strategic insertion of backdoors into text encoders, we align the embeddings of sensitive phrases with those of neutral terms-"a person" instead of the person's actual name. For image encoders, we map embeddings of individuals to be removed from the model to a universal, anonymous embedding. Our empirical results demonstrate the effectiveness of our backdoor-based defense on CLIP by assessing its performance using a specialized privacy attack for zero-shot classifiers. Our approach provides not only a new "dual-use" perspective on backdoor attacks, but also presents a promising avenue to enhance the privacy of individuals within models trained on uncurated web-scraped data.},
	Keywords = {Security, Privacy, Backdoor Attacks, CLIP, Identity Inference Attacks}
}


@inproceedings{czech24representation,
	Anote={./images/czech24representation.png},
	title={Representation Matters for Mastering Chess: Improved Feature Representation in AlphaZero Outperforms Switching to Transformers},
	author={Johannes Czech and Jannis Blüml and Kristian Kersting and Hedinn Steingrimsson},
	year={2024},
  booktitle = {Proceedings of the 27th European Conference on Artificial Intelligence (ECAI)},
	url={https://www.aiml.informatik.tu-darmstadt.de/papers/czech24representation.pdf},
	Note = {While transformers have gained recognition as a versatile tool for artificial intelligence(AI), an unexplored challenge arises in the context of chess - a classical AI benchmark. Here, incorporating Vision Transformers (ViTs) into AlphaZero is insufficient for chess mastery, mainly due to ViTs' computational limitations. The attempt to optimize their efficiency by combining MobileNet and NextViT could not outperform AlphaZero. Instead, we propose a practical improvement that involves a simple change in the input representation and value loss functions. As a result, we achieve a significant performance boost of up to 180 Elo points beyond what is currently achievable with AlphaZero in chess and chess variants.
In addition to these improvements, our experimental results using the Integrated Gradient technique confirm the effectiveness of the newly introduced features.},
	Keywords = {Chess, Decision Transformer, MCTS, Input Representation}
}

@incollection{seng2024ibohpc,
    Anote = {./images/seng2024ibohpc.png},
    title={Hyperparameter Optimization via Interacting with Probabilistic Circuits},
    author={Jonas Seng and Fabrizio Ventola and Zhongjie Yu and Kristian Kersting},
    year={2024},
    Url = {https://openreview.net/pdf?id=k1xrK8l3d2},
    booktitle = {Working Notes of the Workshop Track of the International Conference on Automated Machine Learning (AutoML)},
    Note = {Despite the growing interest in designing truly interactive hyperparameter optimization (HPO) methods, to date, only a few allow to include feedback from experts. However, these methods add friction to the interactive process, rigidly requiring to fully specify the expert input as prior distribution ex ante and often imposing additional constraints on the optimization framework. This hinders the flexible incorporation of expertise and valuable knowledge of domain experts, which might provide partial feedback at any time during optimization. To overcome these limitations, we introduce a novel Bayesian optimization approach leveraging tractable probabilistic models named probabilistic circuits (PCs) as surrogate model. PCs encode a tractable joint distribution over the hybrid hyperparameter space and enable exact conditional inference and sampling, allowing users to provide valuable insights interactively and generate configurations adhering to their feedback. We demonstrate the benefits of the resulting interactive HPO through an extensive empirical evaluation of diverse benchmarks, including the challenging setting of neural architecture search.},
    Keywords = {Automated ML, Interactive Optimization, Neural Architecture Search, Hyperparameter Optimization, Probabilistic Circuits}
}

@inproceedings{poli2024mad,
	Anote={./images/poli2024mad.png},
	title={Mechanistic Design and Scaling of Hybrid Architectures},
	author={Michael Poli and Armin W. Thomas and Eric Nguyen and Stefano Massaroli and Pragaash Ponnusamy and Björn Deiseroth and Kristian Kersting and Taiji Suzuki and Brian Hie and Stefano Ermon and Christopher Re and Ce Zhang},
	year={2024},
  booktitle={Proceedings of the 41st International Conference on Machine Learning (ICML)},
	url={https://arxiv.org/pdf/2403.17844},
	Note = {The development of deep learning architectures is a resource-demanding process, due to a vast design space, long prototyping times, and high compute costs associated with at-scale model training and
evaluation. We set out to simplify this process by grounding it in an end-to-end mechanistic architecture design (MAD) pipeline, encompassing small-scale capability unit tests predictive of scaling laws.
Through a suite of synthetic token manipulation tasks such as compression and recall, designed to probe
capabilities, we identify and test new hybrid architectures constructed from a variety of computational
primitives. We experimentally validate the resulting architectures via an extensive compute-optimal
and a new state-optimal scaling law analysis, training over 500 language models between 70M to 7B
parameters. Surprisingly, we find MAD synthetics to correlate with compute-optimal perplexity, enabling
accurate evaluation of new architectures via isolated proxy tasks. The new architectures found via
MAD, based on simple ideas such as hybridization and sparsity, outperform state-of-the-art Transformer,
convolutional, and recurrent architectures (Transformer++, Hyena, Mamba) in scaling, both at computeoptimal budgets and in overtrained regimes. Overall, these results provide evidence that performance
on curated synthetic tasks can be predictive of scaling laws, and that an optimal architecture should
leverage specialized layers via a hybrid topology.},
Keywords = {Mechanistic Architecture Design, Hybrid Architectures, Transformer, Convolutional Architectures, Recurrent Architectures}
}


@inproceedings{steinmann2024intervene,
	Anote={./images/steinemann2024intervene.png},
	title={Learning to Intervene on Concept Bottlenecks},
	author={David Steinmann and Wolfgang Stammer and Felix Friedrich and Kristian Kersting},
	year={2024},
  booktitle={Proceedings of the 41st International Conference on Machine Learning (ICML)},
	url={https://proceedings.mlr.press/v235/steinmann24a.html},
	Note = {While traditional deep learning models often lack interpretability, concept bottleneck models (CBMs) provide inherent explanations via their concept representations. Specifically, they allow users to perform interventional interactions on these concepts by updating the concept values and thus correcting the predictive output of the model. Traditionally, however, these interventions are applied to the model only once and discarded afterward. To rectify this, we present concept bottleneck memory models (CB2M), an extension to CBMs. Specifically, a CB2M learns to generalize interventions to appropriate novel situations via a two-fold memory with which it can learn to detect mistakes and to reapply previous interventions. In this way, a CB2M learns to automatically improve model performance from a few initially obtained interventions. If no prior human interventions are available, a CB2M can detect potential mistakes of the CBM bottleneck and request targeted interventions. In our experimental evaluations on challenging scenarios like handling distribution shifts and confounded training data, we illustrate that CB2M are able to successfully generalize interventions to unseen data and can indeed identify wrongly inferred concepts. Overall, our results show that CB2M is a great tool for users to provide interactive feedback on CBMs, e.g., by guiding a user's interaction and requiring fewer interventions.},
Keywords = {Concept Bottleneck, Interventions, Two-Fold Memory, Learning}
}

@inproceedings{braun2024cake,
    booktitle = {Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS) },
      title={Deep Classifier Mimicry without Data Access},
      author={Steven Braun and Martin Mundt and Kristian Kersting},
      year={2024},
      Keywords={Deep Learning, model-agnostic transfer, Knowledge distillation, Student-Teacher},
      Anote={./images/braun2024cake.png},
			Key = {Outstanding Student Paper Highlight Award at AISTATS 2024},
      Note={Access to pre-trained models has recently emerged as a standard across numerous machine learning domains. Unfortunately, access to the original data the models were trained on may not equally be granted. This makes it tremendously challenging to fine-tune, compress models, adapt continually, or to do any other type of data-driven update. We posit that original data access may however not be required. Specifically, we propose Contrastive Abductive Knowledge Extraction (CAKE), a model-agnostic knowledge distillation procedure that mimics deep classifiers without access to the original data. To this end, CAKE generates pairs of noisy synthetic samples and diffuses them contrastively toward a model’s decision boundary. We empirically corroborate CAKE's effectiveness using several benchmark datasets and various architectural choices, paving the way for broad application.},
      Url={https://proceedings.mlr.press/v238/braun24b/braun24b.pdf}
}


@inproceedings{delfosse2024raRL,
    booktitle = {Proceedings of the International Conference on Representation Learning (ICLR) },
      title={Adaptive Rational Activations to Boost Deep Reinforcement Learning},
      author={Quentin Delfosse and Patrick Schramowski and Martin Mundt and Alejandro Molina and Kristian Kersting},
      year={2024},
      Keywords={Neural Plasticity, Deep Reinforcement Learning, Rational Activations},
      Anote={./images/delfosse2024ratRL.png},
      Note={Latest insights from biology show that intelligence not only emerges from the connections between neurons, but that individual neurons shoulder more computational responsibility than previously anticipated. Specifically, neural plasticity should be critical in the context of constantly changing reinforcement learning (RL) environments, yet current approaches still primarily employ static activation functions. In this work, we motivate the use of adaptable activation functions in RL and show that rational activation functions are particularly suitable for augmenting plasticity. Inspired by residual networks, we derive a condition under which rational units are closed under residual connections and formulate a naturally regularised version. The proposed joint-rational activation allows for desirable degrees of flexibility, yet regularises plasticity to an extent that avoids overfitting by leveraging a mutual set of activation function parameters across layers. We demonstrate that equipping popular algorithms with (joint) rational activations leads to consistent improvements on different games from the Atari Learning Environment benchmark, notably making DQN competitive to DDQN and Rainbow.},
      Url={https://openreview.net/pdf?id=g90ysX1sVs}
}

@inproceedings{struppek2024iclr,
    booktitle = {Proceedings of the International Conference on Representation Learning (ICLR) },
      title={Be Careful What You Smooth For: Label Smoothing Can Be a Privacy Shield but Also a Catalyst for Model Inversion Attacks},
      author={Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},
      year={2024},
      Keywords={Label Smoothing, Privacy, Model Inversion Attacks, Defense},
      Anote={./images/struppek2024iclr.png},
      Note={Label smoothing – using softened labels instead of hard ones – is a widely adopted regularization method for deep learning, showing diverse benefits such as enhanced generalization and calibration. Its implications for preserving model privacy, however, have remained unexplored. To fill this gap, we investigate the impact of label smoothing on model inversion attacks (MIAs), which aim to generate class-representative samples by exploiting the knowledge encoded in a classifier, thereby inferring sensitive information about its training data. Through extensive analyses, we uncover that traditional label smoothing fosters MIAs, thereby increasing a model's privacy leakage. Even more, we reveal that smoothing with negative factors counters this trend, impeding the extraction of class-related information and leading to privacy preservation, beating state-of-the-art defenses. This establishes a practical and powerful novel way for enhancing model resilience against MIAs.},
      Url={https://openreview.net/pdf?id=1SbkubNdbW}
}

@inproceedings{seng2024iclr,
    booktitle = {Proceedings of the International Conference on Representation Learning (ICLR) },
      title={Learning Large DAGs is Harder than you Think: Many Losses are Minimal for the Wrong DAG},
      author={Jonas Seng and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
      year={2024},
      Keywords={Structure Learning, DAG, Differentiable, Square-based Losses, Scale},
      Anote={./images/seng2024iclr.png},
      Note={Structure learning is a crucial task in science, especially in fields such as medicine and biology, where the wrong identification of (in)dependencies among random variables can have significant implications. The primary objective of structure learning is to learn a Directed Acyclic Graph (DAG) that represents the underlying probability distribution of the data. Many prominent DAG learners rely on least square losses or log-likelihood losses for optimization. It is well-known from regression models that least square losses are heavily influenced by the scale of the variables. Recently it has been demonstrated that the scale of data also affects performance of structure learning algorithms, though with a strong focus on linear 2-node systems and simulated data. Moving beyond these results, we provide conditions under which square-based losses are minimal for wrong DAGs in
-dimensional cases. Furthermore, we also show that scale can impair performance of structure learners if relations among variables are non-linear for both square based and log-likelihood based losses. We confirm our theoretical findings through extensive experiments on synthetic and real-world data.},
      Url={https://openreview.net/pdf?id=gwbQ2YwLhD}
}


@inproceedings{wuest2024pix2code,
	Anote={./images/wuest_pix2code.png},
	title={Pix2Code: Learning to Compose Neural Visual Concepts as Programs},
	author={Antonia Wüst and Wolfgang Stammer and Quentin Delfosse and Devendra Singh Dhami and Kristian Kersting},
	year={2024},
  booktitle={Proceedings of the 40th Conference on Uncertainty in Artificial Intelligence (UAI)},
	url={https://arxiv.org/pdf/2402.08280.pdf},
	Note = {The challenge in learning abstract concepts from images in an unsupervised fashion lies in the required integration of visual perception and generalizable relational reasoning. Moreover, the unsupervised nature of this task makes it necessary for human users to be able to understand a model’s learnt concepts and potentially revise false behaviours. To tackle both the generalizability and interpretability constraints of visual concept learning, we propose Pix2Code, a framework that extends program synthesis to visual relational reasoning by utilizing the abilities of both explicit, compositional symbolic and implicit neural representations. This is achieved by retrieving object representations from images and synthesizing relational concepts as λ-calculus programs. We evaluate the diverse properties of Pix2Code on the challenging reasoning domains, Kandinsky Patterns and CURI, thereby testing its ability to identify compositional visual concepts that generalize to novel data and concept configurations. Particularly, in stark contrast to neural approaches, we show that Pix2Code’s representations remain human interpretable and can be easily revised for improved performance.},
Keywords = {Concept Learning, Program Synthesis, Neuro-Symbolic, Meta-Learning}
}

@inproceedings{poonia2024chiSPN,
	Anote={./images/poonia2024chiSPN.png},
	title={chiSPN: Characteristic Interventional Sum-Product Networks for Causal Inference in Hybrid Domains},
	author={Harsh Poonia and Moritz Willig and Zhongjie Yu and Matej Zecevic and Kristian Kersting and Devendra Singh Dhami},
	year={2024},
  booktitle={Proceedings of the 40th Conference on Uncertainty in Artificial Intelligence (UAI)},
	url={https://openreview.net/pdf?id=s3kqfH5KBI},
	Note = {Causal inference in hybrid domains, characterized by a mixture of discrete and continuous variables, presents a formidable challenge. We take a step towards this direction and propose Characteristic Interventional Sum-Product Network
  (chiSPN) that is capable of estimating interventional distributions in presence of random variables drawn from mixed distributions. chiSPN uses characteristic functions in the leaves of an interventional SPN (iSPN) thereby providing a unified
  view for discrete and continuous random variables through the Fourier–Stieltjes transform of the probability measures. A neural network is used to estimate the parameters of the learned iSPN using the intervened data.
  Our experiments on 3 synthetic heterogeneous datasets suggest that SPN can effectively capture the interventional distributions for both discrete and continuous variables while being expressive and causally adequate. We also show that chiSPN generalize
  to multiple interventions while being trained only on a single intervention data.},
Keywords = {Causal Model, Interventional SPN, Hybrid Domain, Fourier-Stieltjes Transform, Neural Network}
}


@inproceedings{brack2024ledits,
      Anote = {./images/mbrack_ledits_pp.png},
      title={LEDITS++: Limitless Image Editing using Text-to-Image Models},
      author={Manuel Brack and Felix Friedrich and Katharina Kornmeier and Linoy Tsaban and Patrick Schramowski and Kristian Kersting and Apolinaros Passos},
      booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
      year = {2024},
      Note = {Text-to-image diffusion models have recently received a lot of interest for their astonishing ability to produce high-fidelity images from text only. Subsequent research efforts are aiming to exploit the capabilities of these models and leverage them for intuitive, textual image editing. However, existing methods often require time-consuming fine-tuning and lack native support for performing multiple edits simultaneously. To address these issues, we introduce LEDITS++ , an efficient yet versatile technique for image editing using text-to-image models. LEDITS++ requires no tuning nor optimization, runs in a few diffusion steps, natively supports multiple simultaneous edits, inherently limits changes to relevant image regions, and is architecture agnostic.},
      Pages = {},
      Keywords = {Image Editing, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Semantics},
      Url={https://openreview.net/pdf?id=bPiTOXLRRQ}
}


@inproceedings{delfosse2024ocatari,
	Anote={./images/delfosse2024ocatari.png},
	title={OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments},
	author={Quentin Delfosse and Jannis Blüml and Bjarne Gregori and Sebastian Sztwiertnia and Kristian Kersting},
	year={2024},
  booktitle={Proceedings of the First Conference on Reinforcement Learning (RLC)},
	url={https://arxiv.org/pdf/2306.08649},
	Note = {Cognitive science and psychology suggest that object-centric representations of complex scenes are a promising step towards enabling efficient abstract reasoning from low-level perceptual features. Yet, most deep reinforcement learning approaches only rely on pixel-based representations that do not capture the compositional properties of natural scenes. For this, we need environments and datasets that allow us to work and evaluate object-centric approaches. In our work, we extend the Atari Learning Environments, the most-used evaluation framework for deep RL approaches, by introducing OCAtari, that performs resource-efficient extractions of the object-centric states for these games. Our framework allows for object discovery, object representation learning, as well as object-centric RL. We evaluate OCAtari's detection capabilities and resource efficiency.},
Keywords = {Deep Reinforcement Learning, Object-centric Deep Learning, Atari, Arcade Games, RAM Extraction method (REM), Vision Extraction method (VEM)}
}


@inproceedings{kalter2024bilevel,
	Anote={./images/kalter2024bilevel.png},
	title={Bi-Level One-Shot Architecture Search for Probabilistic Time Series Forecasting},
	author={Fabian Kalter and Jonas Seng and Zhongjie Yu and Fabrizio Ventola and Kristian Kersting},
	year={2024},
  booktitle={Proceedings of the International Conference on Automated Machine Learning (AutoML)},
	url={https://openreview.net/pdf?id=AaPhnfFQYn},
	Note = {Time series forecasting is ubiquitous in many disciplines. A recent hybrid architecture named predictive Whittle networks (PWNs) tackles this task by employing two distinct modules, a tractable probabilistic model and a neural forecaster, with the former guiding the latter by providing likelihoods about predictions during training. Although PWNs achieve state-of-the-art accuracy, finding the optimal type of probabilistic model and neural forecaster (macro-architecture search) and the architecture of each module (micro-architecture search) of such hybrid models remains difficult and time-consuming. Current one-shot neural architecture search (NAS) methods approach this challenge by focusing on either the micro or the macro aspect, overlooking mutual impact, and could attain the overall optimization only sequentially. To overcome these limitations, we introduce a bi-level one-shot NAS method that optimizes such hybrid architectures simultaneously, leveraging the relationships between the micro and the macro architectural levels. We empirically demonstrate that the hybrid architectures found by our method outperform human-designed and overparameterized ones on various challenging datasets. Furthermore, we unveil insights about underlying connections between architectural choices and temporal features.},
Keywords = {Automated ML, Time Series Forecasting, Neural Architecture Search, Whittle Networks, Bi-Level Program}
}


@inproceedings{paul2024collas,
    Anote = {./images/paul2024collas.png},
    Author = {Subarnaduti Paul and Lars-Joel Frey and Roshni Ramanna Kamath and Kristian Kersting and Martin Mundt},
    booktitle={Proceedings of the Third Conference on Lifelong Learning Agents (CoLLAs)},
    title = {Masked Autoencoders are Efficient Continual Federated Learners},
    Keywords = {Federated Learning, Masked Autoencoders, Continual Learning},
  Pages = {},
    Note = {Machine learning is typically framed from a perspective of i.i.d., and more importantly, isolated data. In parts, federated learning lifts this assumption, as it sets out to solve the real-world challenge of collaboratively learning a shared model from data distributed across clients. However, motivated primarily by privacy and computational constraints, the fact that data may change, distributions drift, or even tasks advance individually on clients, is seldom taken into account. The field of continual learning addresses this separate challenge and first steps have recently been taken to leverage synergies in distributed settings of a purely supervised nature. Motivated by these prior works, we posit that such federated continual learning should be grounded in unsupervised learning of representations that are shared across clients; in the loose spirit of how humans can indirectly leverage others' experience without exposure to a specific task. For this purpose, we demonstrate that masked autoencoders for distribution estimation are particularly amenable to this setup. Specifically, their masking strategy can be seamlessly integrated with task attention mechanisms to enable selective knowledge transfer between clients. We empirically corroborate the latter statement through several continual federated scenarios on both image and binary datasets.},
    Url = {},
    Crossref = {},
    Year = {2024}
}

@inproceedings{mathur2024aime,
    Anote = {./images/mathur2024aime.png},
    Author = {Saurabh Mathur and Veerendra Gadekar and Rashika Ramola and Avery Wang and Ramachandran Thiruvengadam and David Haas and Shinjini Bhatnagar and Nitya Wadhwa and Garbhini Study Group and Predrag Radivojac and Himanshu Sinha and Kristian Kersting and Sriraam Natarajan},
    booktitle={Proceedings of the 22nd International Conference on Artificial Intelligence in Medicine (AIME)},
    title = {Modeling multiple adverse pregnancy outcomes: Learning from diverse data sources},
    Keywords = {Bayesian Networks, Large Language Models, Adverse Pregnancy Outcomes, Preterm Birth, New Hypertension, Preeclampsia},
  Pages = {},
    Note = {We consider the problem of modeling adverse pregnancy outcomes (APOs) from diverse data sets and aim to understand what is common between them and what is unique for each of these data sets. To this effect, we consider three different data sets (a clinical study from the US, EHRs from a US hospital, and a clinical study in India) and model three specific APOs - preterm birth, new hypertension, and preeclampsia. Since LLMs can efficiently summarize the scientific literature, we use them to generate initial hypotheses and use the different data sets to refine the hypotheses to create joint probabilistic models (as Bayesian networks). Our analyses show that there are eight relationships between risk factors common to all three populations and some unique relationships for specific populations.},
    Url = {../../papers/mathur2024aime.pdf},
    Crossref = {},
    Year = {2024}
}


@inproceedings{moritz2024ratio,
    Anote = {./images/moritz2024ratio.png},
    Author = {Moritz Willig and Matej Zecevic and Kristian Kersting},
    booktitle={Proceedings of the 1st International Conference on Recent Advances in Robust Argumentation Machines (RATIO)},
    title = {"Do not disturb my circles!" Identifying the Type of Counterfactual at Hand},
    Keywords = {Explanations, Causality, Interventions, Backtracking},
  	Pages = {},
    Note = {When the phenomena of interest are in need of explanation, we are often in search of the underlying root causes. Causal inference provides tools for identifying these root causes---by performing interventions on suitably chosen variables we can observe down-stream effects in the outcome variable of interest. On the other hand, argumentation as an approach of attributing observed outcomes to specific factors, naturally lends itself as a tool for determining the most plausible explanation. We can further improve the robustness of such explanations by measuring their likelihood within a mutually agreed-upon causal model. For this, typically one of in-principle two distinct types of counterfactual explanations is used: interventional counterfactuals, which treat changes as deliberate interventions to the causal system, and backtracking counterfactuals, which attribute changes exclusively to exogenous factors. Although both frameworks share the common goal of inferring true causal factors, they fundamentally differ in their conception of counterfactuals. Here, we present the first approach that decides when to expect interventional and when to opt for backtracking counterfactuals.},
    Url = {},
    Crossref = {},
    Year = {2024}
}

@incollection{helff2024llavaguard,
	Anote={./images/llavaguard_pipe.png},
	title={LLAVAGUARD: VLM-based Safeguard for Vision Dataset Curation and Safety Assessment},
	author={Lukas Helff and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
	year={2024},
	booktitle={Working Notes of the CVPR 2024 Workshop on Responsible Generative AI (ReGenAI), preprint at arxiv:2406.05113},
	url={https://arxiv.org/abs/2406.05113},
	Note = {We introduce LlavaGuard, a family of multimodal safeguard models based on Llava, offering a robust framework for evaluating the safety compliance of vision datasets and models. Our models come with a new taxonomy designed for assessing safety risks within visual data. With this safety taxonomy, we have collected and annotated a high-quality dataset to guide Vision-Language Models (VLMs) in safety. We present models in two sizes, namely LlavaGuard-7b and LlavaGuard-13b, both safety-tuned on our novel, annotated dataset to perform policy-based safety assessments of visual content. In this context, LlavaGuard goes beyond binary safety classification by providing information on the violated safety categories, a detailed explanation, and a final assessment. In our evaluations, our models demonstrate state-of-the-art performance with LlavaGuard-13b exhibiting the best results, while the much smaller LlavaGuard-7b model outperforms the much larger Llava-34b baseline. Furthermore, LlavaGuard is designed to allow for customization of the safety taxonomy to align with specific use cases, facilitating zero-shot prompting with individual policies for tailored content moderation},
Keywords = {AI Safety, Safety Evaluation, Multimodal, Vision Language Model}
}


@misc{tedeschi2024alert,
	Anote={./images/tedeschi2024alert.png},
	title={ALERT: A Comprehensive Benchmark for Assessing Large Language Models' Safety through Red Teaming},
	author={Simone Tedeschi and Felix Friedrich and Patrick Schramowski and Kristian Kersting and Roberto Navigli and Huu Nguyen and Bo Li},
	year={2024},
	Howpublished={arXiv preprint arXiv:2404.08676},
	url={https://arxiv.org/pdf/2404.08676},
	Note = {When building Large Language Models (LLMs), it is paramount to bear safety in mind and protect them with guardrails. Indeed, LLMs should never generate content promoting or normalizing harmful, illegal, or unethical behavior that may contribute to harm to individuals or society. This principle applies to both normal and adversarial use. In response, we introduce ALERT, a large-scale benchmark to assess safety based on a novel fine-grained risk taxonomy. It is designed to evaluate the safety of LLMs through red teaming methodologies and consists of more than 45k instructions categorized using our novel taxonomy. By subjecting LLMs to adversarial testing scenarios, ALERT aims to identify vulnerabilities, inform improvements, and enhance the overall safety of the language models. Furthermore, the fine-grained taxonomy enables researchers to perform an in-depth evaluation that also helps one to assess the alignment with various policies. In our experiments, we extensively evaluate 10 popular open- and closed-source LLMs and demonstrate that many of them still struggle to attain reasonable levels of safety.},
Keywords = {Red Teaming, Large Language Model, AI Safety, Benchmark, Evaluation, Risk Taxonomy}
}


@misc{busch2024conconarxiv,
	Anote={./images/busch_whereisthetruth.png},
	title={Where is the Truth? The Risk of Getting Confounded in a Continual World},
	author={Florian Peter Busch and Roshni Kamath and Rupert Mitchell and Wolfgang Stammer and Kristian Kersting and Martin Mundt},
	year={2024},
	Howpublished={arXiv preprint arXiv:2402.06434},
	url={https://arxiv.org/pdf/2402.06434.pdf},
	Note = {A dataset is confounded if it is most easily solved via a spurious correlation which fails to generalize to new data. We will show that, in a continual learning setting where confounders may vary in time across tasks, the resulting challenge far exceeds the standard forgetting problem normally considered. In particular, we derive mathematically the effect of such confounders on the space of valid joint solutions to sets of confounded tasks. Interestingly, our theory predicts that for many such continual datasets, spurious correlations are easily ignored when the tasks are trained on jointly, but it is far harder to avoid confounding when they are considered sequentially. We construct such a dataset and demonstrate empirically that standard continual learning methods fail to ignore confounders, while training jointly on all tasks is successful. Our continually confounded dataset, ConCon, is based on CLEVR images and demonstrates the need for continual learning methods with more robust behavior with respect to confounding.},
Keywords = {Continual Learning, Confounders, Dataset}
}


@misc{wuest2024pix2codearxiv,
	Anote={./images/wuest_pix2code.png},
	title={Pix2Code: Learning to Compose Neural Visual Concepts as Programs},
	author={Antonia Wüst and Wolfgang Stammer and Quentin Delfosse and Devendra Singh Dhami and Kristian Kersting},
	year={2024},
	Howpublished={arXiv preprint arXiv:2402.08280},
	url={https://arxiv.org/pdf/2402.08280.pdf},
	Note = {The challenge in learning abstract concepts from images in an unsupervised fashion lies in the required integration of visual perception and generalizable relational reasoning. Moreover, the unsupervised nature of this task makes it necessary for human users to be able to understand a model’s learnt concepts and potentially revise false behaviours. To tackle both the generalizability and interpretability constraints of visual concept learning, we propose Pix2Code, a framework that extends program synthesis to visual relational reasoning by utilizing the abilities of both explicit, compositional symbolic and implicit neural representations. This is achieved by retrieving object representations from images and synthesizing relational concepts as λ-calculus programs. We evaluate the diverse properties of Pix2Code on the challenging reasoning domains, Kandinsky Patterns and CURI, thereby testing its ability to identify compositional visual concepts that generalize to novel data and concept configurations. Particularly, in stark contrast to neural approaches, we show that Pix2Code’s representations remain human interpretable and can be easily revised for improved performance.},
Keywords = {Concept Learning, Program Synthesis, Neuro-Symbolic, Meta-Learning}
}

@misc{nakamura2024auroram,
      Anote={./images/aurora.png},
	title={Aurora-M: The First Open Source Multilingual Language Model Red-teamed according to the U.S. Executive Order},
      author={Taishi Nakamura and Mayank Mishra and Simone Tedeschi and Yekun Chai and Jason T Stillerman and Felix Friedrich and Prateek Yadav and Tanmay Laud and Vu Minh Chien and Terry Yue Zhuo and Diganta Misra and Ben Bogin and Xuan-Son Vu and Marzena Karpinska and Arnav Varma Dantuluri and Wojciech Kusa and Tommaso Furlanello and Rio Yokota and Niklas Muennighoff and Suhas Pai and Tosin Adewumi and Veronika Laippala and Xiaozhe Yao and Adalberto Junior and Alpay Ariyak and Aleksandr Drozd and Jordan Clive and Kshitij Gupta and Liangyu Chen and Qi Sun and Ken Tsui and Noah Persaud and Nour Fahmy and Tianlong Chen and Mohit Bansal and Nicolo Monti and Tai Dang and Ziyang Luo and Tien-Tung Bui and Roberto Navigli and Virendra Mehta and Matthew Blumberg and Victor May and Huu Nguyen and Sampo Pyysalo},
      year={2024},
	Howpublished={arXiv preprint arXiv:2404.00399},
	url={https://arxiv.org/pdf/2404.00399.pdf},
  Note = {Pretrained language models underpin several AI applications, but their high computational cost for training limits accessibility. Initiatives such as BLOOM and StarCoder aim to democratize access to pretrained models for collaborative community development. However, such existing models face challenges: limited multilingual capabilities, continual pretraining causing catastrophic forgetting, whereas pretraining from scratch is computationally expensive, and compliance with AI safety and development laws. This paper presents Aurora-M, a 15B parameter multilingual open-source model trained on English, Finnish, Hindi, Japanese, Vietnamese, and code. Continually pretrained from StarCoderPlus on 435 billion additional tokens, Aurora-M surpasses 2 trillion tokens in total training token count. It is the first open-source multilingual model fine-tuned on human-reviewed safety instructions, thus aligning its development not only with conventional red-teaming considerations, but also with the specific concerns articulated in the Biden-Harris Executive Order on the Safe, Secure, and Trustworthy Development and Use of Artificial Intelligence. Aurora-M is rigorously evaluated across various tasks and languages, demonstrating robustness against catastrophic forgetting and outperforming alternatives in multilingual settings, particularly in safety evaluations. To promote responsible open-source LLM development, Aurora-M and its variants are released publicly.},
  Keywords = {Multilingual Model, Safety, Read-teaming, Policy, Multilingual}
}


@misc{delfosse2024interpretablearxiv,
  Anote = {./images/delfosse2024interpretable.png},
  title={Interpretable concept bottlenecks to align reinforcement learning agents},
  author={Quentin Delfosse and Sebastian Sztwiertnia and Wolfgang Stammer and Mark Rothermel and Kristian Kersting},
  Howpublished = {arXiv preprint arXiv:2401.05821},
  year = {2024},
  Url = {https://arxiv.org/pdf/2401.05821v2.pdf},
  Pages = {},
  Note = {Goal misalignment, reward sparsity and difficult credit assignment are only a few of the many issues that make it difficult for deep reinforcement learning (RL) agents to learn optimal policies. Unfortunately, the black-box nature of deep neural networks impedes the inclusion of domain experts for inspecting the model and revising suboptimal policies. To this end, we introduce *Successive Concept Bottleneck Agents* (SCoBots), that integrate consecutive concept bottleneck (CB) layers. In contrast to current CB models, SCoBots do not just represent concepts as properties of individual objects, but also as relations between objects which is crucial for many RL tasks. Our experimental results provide evidence of SCoBots' competitive performances, but also of their potential for domain experts to understand and regularize their behavior. Among other things, SCoBots enabled us to identify a previously unknown misalignment problem in the iconic video game, Pong, and resolve it. Overall, SCoBots thus result in more human-aligned RL agents.},
  Keywords = {Reinforcement Learning, Transparent agents, Interpretability, Concept Bottlebecks}
}


@inproceedings{keshmirian2024cogsci,
    Anote = {./images/keshmirian2024realign.png},
    Author = {Anita Keshmirian and Moritz Willig and Babak Hemmatian and Ulrike Hahn and Kristian Kersting and Tobias Gerstenberg},
    booktitle={Proceedings of the 46th Annual Meeting of the Cognitive Science Society (CogSci)},
    Keywords = {Causal Cognition, Mechanistic Reasoning, Large Language Models, Causal Chain, Bias in Causal Judgment, Common Cause, Bayesian networks, Causal argumentation},
  Pages = {},
    Note = {Causal reasoning is a critical aspect of both human cognition and artificial intelligence (AI), playing a prominent role in understanding the relationships between events. Causal Bayesian Networks (CBNs) have been instrumental in modeling such relationships, using directed, acyclic links between nodes in a network to depict probabilistic associations between variables. Deviations from these graphical models’ edicts would result in biased judgments. This study explores one such bias in the causal judgments of humans and Large Language Models (LLMs) by examining two structures in CBNs: Canonical Chain (A→B→C) and Common Cause (A←B→C) networks. In these structures, once the intermediate variable (B) is known, the probability of the outcome (C) is normatively independent of the initial cause (A). However, studies have shown that humans often ignore this independence. We tested the mutually exclusive predictions of three theories that could account for this bias (N=300). Using hierarchical mixed-effect models, we found that humans tend to perceive causes in Chain structures as significantly stronger, providing support for only one of the hypotheses. This increase in perceived causal power might reflect a view of intermediate causes as more reflective of reliable mechanisms, which could, in turn, stem from our interactions with the world or the way we communicate causality to others.  LLMs are primarily trained on language data. Therefore, examining whether they exhibit similar biases in causal reasoning can help us understand the origins of canonical Chain structures’ perceived causal power while also shedding light on whether LLMs can abstract causal principles. To investigate this, we subjected three LLMs, GPT3.5-Turbo, GPT4, and Luminous Supreme Control, to the same queries as our human subjects, adjusting a key ‘temperature’ hyperparameter. Our findings show that, particularly with higher temperatures (i.e., greater randomness), LLMs exhibit a similar boost in the perceived causal power of Chains, suggesting the bias is at least partly reflected in language use. Similar results across items suggest a degree of causal principle abstraction in the studied models. Implications for causal representation in humans and LLMs are discussed.},
    Title = {Biased Causal Strength Judgments in Humans and Large Language Models},
    Url = {},
    Crossref = {},
    Year = {2024}
}


@inproceedings{deiseroth2024dtm,
      Anote = {./images/deiseroth2024dtm.png},
      booktitle = {Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics (NAACL 2024) },
      title={Divergent Token Metrics: Measuring degradation to prune away LLM components – and optimize quantization},
      author={Björn Deiseroth and Max Meuer and Nikolas Gritsch and Constantin Eichenberg and Patrick Schramowski and Matthias Aßenmacher and Kristian Kersting},
      Note = {Large Language Models (LLMs) have reshaped natural language processing with their impressive capabilities. Their ever-increasing size, however, have raised concerns about their effective deployment and the need for LLM compression. This study introduces the Divergent Token Metrics (DTMs), a novel approach for assessing compressed LLMs, addressing the limitations of traditional perplexity or accuracy measures that fail to accurately reflect text generation quality. DTMs focus on token divergence, that allow deeper insights into the subtleties of model compression, in particular when evaluating components' impacts individually. Utilizing the First Divergent Token Metric (FDTM) in model sparsification reveals that 25% of all attention components can be pruned beyond 90% on the Llama-2 model family, still keeping SOTA performance. For quantization FDTM suggests that over 80% of parameters can naively be transformed to int8 without special outlier management. These evaluations indicate the necessity of choosing appropriate compressions for parameters individually---and that FDTM can identify those---while standard metrics result in deteriorated outcomes.},
      year={2024},
      Pages = {},
      Keywords = {Quantization, Model Analysdis, Interpretability, Low Compute Setting, Efficiency, Deep Learning},
      Url={https://arxiv.org/pdf/2311.01544}
}

@inproceedings{kohaut2024icuas,
    Anote = {./images/kohaut2024icuas.png},
    Author = {Simon Kohaut and Benedict Flade and Devendra Singh Dhami and Eggert Julian and Kristian Kersting},
    booktitle={Proceedings of the 2024 International Conference on Unmanned Aircraft Systems (ICUAS)},
    title={Towards Probabilistic Clearance, Explanation and Optimization},
    Keywords = {Mission Design, Probabilistic Inference, Logic},
    Pages = {},
    Note = {The employment of Unmanned Aerial Systems (UAS) beyond
     visual line of sight (BVLOS) is both an endearing and
     challenging task. While UAS have the potential to greatly
     enhance today's logistics and emergency response
     capabilities, unmanned flying objects above the heads of
     unprotected pedestrians induce similarly great safety
     risks. In this work, we make strides towards improved
     safety in the application of UAS by introducing clearance,
     explanation and optimization strategies of UAS missions
     grounded in a probabilistic logic framework. Our approach
     encapsulates critical domain knowledge, legal requirements
     and safety assertions in Hybrid Probabilistic Logic
     Programs (HPLP), meaning we encode the agents navigation
     space in a mix of discrete and continuous distributions
     over spatial relations. As a key contribution, we formalize
     how safe and legal trajectories are planned, and remote
     pilots informed, within this Probabilistic Mission (ProMis)
     framework. Based on real crowd-sourced map data and a
     synthetic scenario, we demonstrate the application and
     utility of our methods in UAS navigation.},
    Url = {https://www.aiml.informatik.tu-darmstadt.de/papers/kohaut2024ceo.pdf},
    Crossref = {},
    Year = {2024}
}


@inproceedings{zipperling24collafuse,
  author = {Domenique Zipperling and Simeon Allmendinger and Lukas Struppek and Niklas Kühl},
  title = {CollaFuse: Navigating Limited Resources and Privacy in Collaborative Generative AI},
  year = {2024},
  booktitle = {European Conference on Information Systems (ECIS)},
  Url = {https://arxiv.org/abs/2402.19105},
  Note = {In the landscape of generative artificial intelligence, diffusion-based models present challenges for socio-technical systems in data requirements and privacy. Traditional approaches like federated learning distribute the learning process but strain individual clients, especially with constrained resources (e.g., edge devices). In response to these challenges, we introduce CollaFuse, a novel framework inspired by split learning. Tailored for efficient and collaborative use of denoising diffusion probabilistic models, CollaFuse enables shared server training and inference, alleviating client computational burdens. This is achieved by retaining data and computationally inexpensive GPU processes locally at each client while outsourcing the computationally expensive processes to the shared server. Demonstrated in a healthcare context, CollaFuse enhances privacy by highly reducing the need for sensitive information sharing. These capabilities hold the potential to impact various application areas, such as the design of edge computing solutions, healthcare research, or autonomous driving. In essence, our work advances distributed machine learning, shaping the future of collaborative GenAI networks.},
  Anote={./images/struppek_collafuse.png},
  Keywords = {Collaborative Learning, Split Learning, Diffusion Models}
}

@misc{helfenstein2024checkmating,
  Anote={./images/helfenstein2024checkmating.png},
  author = {Felix Helfenstein and Jannis Blüml and Johannes Czech and Kristian Kersting},
  title = {Checkmating One, by Using Many: Combining Mixture of Experts with MCTS to Improve in Chess},
  Howpublished = {arXiv preprint arXiv:2401.16852},
  year = {2024},
  Url = {https://arxiv.org/abs/2401.16852},
  Pages = {},
  Crossref = {https://github.com/HelpstoneX/CrazyAra},
  Note = {This paper presents a new approach that integrates deep learning with computational chess, using both the Mixture of Experts (MoE) method and Monte-Carlo Tree Search (MCTS). Our methodology employs a suite of specialized models, each designed to respond to specific changes in the game's input data. This results in a framework with sparsely activated models, which provides significant computational benefits. Our framework combines the MoE method with MCTS, in order to align it with the strategic phases of chess, thus departing from the conventional ``one-for-all'' model. Instead, we utilize distinct game phase definitions to effectively distribute computational tasks across multiple expert neural networks. Our empirical research shows a substantial improvement in playing strength, surpassing the traditional single-model framework. This validates the efficacy of our integrated approach and highlights the potential of incorporating expert knowledge and strategic principles into neural network design. The fusion of MoE and MCTS offers a promising avenue for advancing machine learning architectures.},
  Keywords = {Mixture of Experts, Game Phases, Chess, Monte-Carlo Tree Search, AlphaZero}
}


@incollection{struppek2024adversarialllm,
  title={Exploring the Adversarial Capabilities of Large Language Models},
  author={Lukas Struppek and Minh Hieu Le and Dominik Hintersdorf and Kristian Kersting},
  year={2024},
  Url = {https://arxiv.org/pdf/2402.09132.pdf},
  Pages = {},
  booktitle={ICLR 2024 Workshop on Secure and Trustworthy Large Language Models (SeT LLM)},
  Note = {The proliferation of large language models (LLMs) has sparked widespread and general interest due to their strong language generation capabilities, offering great potential for both industry and research. While previous research delved into the security and privacy issues of LLMs, the extent to which these models can exhibit adversarial behavior remains largely unexplored. Addressing this gap, we investigate whether common publicly available LLMs have inherent capabilities to perturb text samples to fool safety measures, so-called adversarial examples resp. attacks. More specifically, we investigate whether LLMs are inherently able to craft adversarial examples out of benign samples to fool existing safe rails. Our experiments, which focus on hate speech detection, reveal that LLMs succeed in finding adversarial perturbations, effectively undermining hate speech detection systems. Our findings carry significant implications for (semi-)autonomous systems relying on LLMs, highlighting potential challenges in their interaction with existing systems and safety measures.},
  Anote = {./images/struppek_adv_llm.png},
  Keywords = {Large Language Models, Adversarial Examples}
}

@incollection{struppek2024homoglyphs,
    Anote = {./images/struppek2023jair.png},
    Author = {Lukas Struppek and Dominik Hintersdorf and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
    booktitle={ICLR 2024 Workshop on Navigating and Addressing Data Problems for Foundation Models (DPFM)},
    Keywords = {Generative AI, Text-guided image generation, Text-to-image synthesis, Multimodal Systems, Cultural
biases},
  Pages = {},
    Note = {Models for text-to-image synthesis have recently drawn a lot of interest. They are capable of producing high-quality images that depict a variety of concepts and styles when conditioned on textual descriptions. However, these models adopt cultural characteristics associated with specific Unicode scripts from their vast amount of training data, which may not be immediately apparent. We show that by simply inserting single non-Latin characters in the textual description, common models reflect cultural biases in their generated images. We analyze this behavior both qualitatively and quantitatively, and identify a model's text encoder as the root cause of the phenomenon. Such behavior can be interpreted as a model feature, offering users a simple way to customize the image generation and reflect their own cultural background. Yet, malicious users or service providers may also try to intentionally bias the image generation. One goal might be to create racist stereotypes by replacing Latin characters with similarly-looking characters from non-Latin scripts, so-called homoglyphs.},
    Title = {Exploiting Cultural Biases via Homoglyphs in Text-to-Image Synthesis},
    Url = {https://openreview.net/pdf?id=VeCTgo5f9q},
		Key = {Best Paper Award at DPFM 2024},
    Crossref = {},
    Year = {2024}
}


@incollection{keshmirian2024realign,
    Anote = {./images/keshmirian2024realign.png},
    Author = {Anita Keshmirian and Moritz Willig and Babak Hemmatian and Ulrike Hahn and Kristian Kersting and Tobias Gerstenberg},
    booktitle={Working Notes of the ICLR 2024 Workshop on Representational Alignment (Re-Align)},
    Keywords = {Causal Cognition, Mechanistic Reasoning, Large Language Models, Causal Chain, Bias in Causal Judgment, Common Cause, Bayesian networks, Causal argumentation},
  Pages = {},
    Note = {Causal reasoning is a critical aspect of both human cognition and artificial intelligence (AI), playing a prominent role in understanding the relationships between events. Causal Bayesian Networks (CBNs) have been instrumental in modeling such relationships, using directed, acyclic links between nodes in a network to depict probabilistic associations between variables. Deviations from these graphical models’ edicts would result in biased judgments. This study explores one such bias in the causal judgments of humans and Large Language Models (LLMs) by examining two structures in CBNs: Canonical Chain (A→B→C) and Common Cause (A←B→C) networks. In these structures, once the intermediate variable (B) is known, the probability of the outcome (C) is normatively independent of the initial cause (A). However, studies have shown that humans often ignore this independence. We tested the mutually exclusive predictions of three theories that could account for this bias (N=300). Using hierarchical mixed-effect models, we found that humans tend to perceive causes in Chain structures as significantly stronger, providing support for only one of the hypotheses. This increase in perceived causal power might reflect a view of intermediate causes as more reflective of reliable mechanisms, which could, in turn, stem from our interactions with the world or the way we communicate causality to others.  LLMs are primarily trained on language data. Therefore, examining whether they exhibit similar biases in causal reasoning can help us understand the origins of canonical Chain structures’ perceived causal power while also shedding light on whether LLMs can abstract causal principles. To investigate this, we subjected three LLMs, GPT3.5-Turbo, GPT4, and Luminous Supreme Control, to the same queries as our human subjects, adjusting a key ‘temperature’ hyperparameter. Our findings show that, particularly with higher temperatures (i.e., greater randomness), LLMs exhibit a similar boost in the perceived causal power of Chains, suggesting the bias is at least partly reflected in language use. Similar results across items suggest a degree of causal principle abstraction in the studied models. Implications for causal representation in humans and LLMs are discussed.},
    Title = {Biased Causal Strength Judgments in Humans and Large Language Models},
    Url = {https://openreview.net/pdf?id=544P6YidFk},
    Crossref = {},
    Year = {2024}
}

@misc{derstroff2024amplifying,
  title={Amplifying Exploration in Monte-Carlo Tree Search by Focusing on the Unknown},
  author={Cedric Derstroff and Jannis Brugger and Jannis Blüml and Mira Mezini and Stefan Kramer and Kristian Kersting},
  year={2024},
  Howpublished = {arXiv preprint 2402.08511},
  Note = {Monte-Carlo tree search (MCTS) is an effective anytime algorithm with a vast amount of applications. It strategically allocates computational resources to focus on promising segments of the search tree, making it a very attractive search algorithm in large search spaces. However, it often expends its limited resources on reevaluating previously explored regions when they remain the most promising path. Our proposed methodology, denoted as AmEx-MCTS, solves this problem by introducing a novel MCTS formulation. Central to AmEx-MCTS is the decoupling of value updates, visit count updates, and the selected path during the tree search, thereby enabling the exclusion of already explored subtrees or leaves. This segregation preserves the utility of visit counts for both exploration-exploitation balancing and quality metrics within MCTS. The resultant augmentation facilitates in a considerably broader search using identical computational resources, preserving the essential characteristics of MCTS. The expanded coverage not only yields more precise estimations but also proves instrumental in larger and more complex problems. Our empirical evaluation demonstrates the superior performance of AmEx-MCTS, surpassing classical MCTS and related approaches by a substantial margin.},
  Anote = {./images/AmEx.png},
  Keywords = {Monte-Carlo Tree Search, Exploration vs Exploitation, Upper Confidence Bounds for Trees}
}

@inproceedings{derstroff2023peer,
  title={Peer Learning: Learning Complex Policies in Groups from Scratch via Action Recommendations},
  volume={38},
  url={https://ojs.aaai.org/index.php/AAAI/article/view/29061},
  DOI={10.1609/aaai.v38i10.29061},
  Note={Peer learning is a novel high-level reinforcement learning framework for agents learning in groups. While standard reinforcement learning trains an individual agent in trial-and-error fashion, all on its own, peer learning addresses a related setting in which a group of agents, i.e., peers, learns to master a task simultaneously together from scratch. Peers are allowed to communicate only about their own states and actions recommended by others: &quot;What would you do in my situation?&quot;. Our motivation is to study the learning behavior of these agents.
We formalize the teacher selection process in the action advice setting as a multi-armed bandit problem and therefore highlight the need for exploration. Eventually, we analyze the learning behavior of the peers and observe their ability to rank the agents’ performance within the study group and understand which agents give reliable advice. Further, we compare peer learning with single agent learning and a state-of-the-art action advice baseline. We show that peer learning is able to outperform single-agent learning and the baseline in several challenging discrete and continuous OpenAI Gym domains. Doing so, we also show that within such a framework complex policies from action recommendations beyond discrete action spaces can evolve.},
  number={10},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  author={Cedric Derstroff and Mattia Cerrato and Jannis Brugger and Jan Peters and Stefan Kramer},
  year={2024}, month={Mar.}, pages={11766-11774},
  Anote = {./images/peerlearning_paper.png},
  Keywords = {Reinforcement Learning, Imitation Learning & Inverse Reinforcement Learning, Adversarial Agents, Agent Communication, Multiagent Learning},
}

@misc{friedrich2024multilingual,
  Anote={./images/magbig.png},
  author = {Felix Friedrich and Katharina Hämmerl and Patrick Schramowski and Manuel Brack and Jindrich Libovicky and Kristian Kersting and Alexander Fraser},
  title={Multilingual Text-to-Image Generation Magnifies Gender Stereotypes and Prompt Engineering May Not Help You},
  Howpublished = {arXiv preprint arXiv:2401.16092},
  year = {2024},
  Url = {https://arxiv.org/pdf/2401.16092},
  Pages = {},
  Note = {Text-to-image generation models have recently achieved astonishing results in image quality, flexibility, and text alignment and are consequently employed in a fast-growing number of applications. Through improvements in multilingual abilities, a larger community now has access to this kind of technology. Yet, as we will show, multilingual models suffer similarly from (gender) biases as monolingual models. Furthermore, the natural expectation is that these models will provide similar results across languages, but this is not the case and there are important differences between languages. Thus, we propose a novel benchmark MAGBIG intending to foster research in multilingual models without gender bias. We investigate whether multilingual T2I models magnify gender bias with MAGBIG. To this end, we use multilingual prompts requesting portrait images of persons of a certain occupation or trait (using adjectives). Our results show not only that models deviate from the normative assumption that each gender should be equally likely to be generated, but that there are also big differences across languages. Furthermore, we investigate prompt engineering strategies, i.e. the use of indirect, neutral formulations, as a possible remedy for these biases. Unfortunately, they help only to a limited extent and result in worse text-to-image alignment. Consequently, this work calls for more research into diverse representations across languages in image generators.},
  Keywords = {AI Ethics, Generative AI, Text-to-Image Models}
}


@article{thomas2024decisions,
  Anote = {./images/thomas2023decisions.png},
  title = {Modeling dataset bias in machine-learned theories of economic decision making},
  author={Tobias Thomas and Dominik Straub and Fabian Tatai and Megan Shene and Tümer Tosik and Kristian Kersting and Constantin Rothkopf},
  Journal = {Nature Human Behaviour},
  Note = {Normative and descriptive models have long vied for explaining and predicting human risky choices, such as those between goods or gambles. A recent study (Peterson et al., 2021, Science) reports the discovery of a new, more accurate model of human decision-making by training neural networks on a new online large-scale dataset, choices13k. Here, we systematically analyze the relationships between several models and datasets using machine learning methods and find evidence for dataset bias. Because participants’ choices in stochastically dominated gambles were consistently skewed towards equipreference in the choices13k dataset, we hypothesized that this reflected increased decision noise. Indeed, a probabilistic generative model adding structured decision noise to a neural network trained on data from a laboratory study transferred best, i.e. outperformed all models apart from those trained on choices13k. We conclude that a careful combination of theory and data analysis is still required to understand the complex interactions of machine learning models and data of human risky choices.},
  Keywords = {choices13k, economic decisions, deep learning, data-driven, no free lunch, model-driven, computational cognitive science, no end of theory},
  Publisher = {Nature Publishing Group},
  url={../../papers/thomas2024decisions.pdf},
  year={2024},
  volume={},
  pages={},
  issn={},
  doi={},
  url={}
}


@article{hintersdorf2024clip_privacy,
  Anote = {./images/hintersdorf2022clipping_privacy.png},
  title = {Does CLIP Know My Face?},
  author={Dominik Hintersdorf and Lukas Struppek and Manuel Brack and Felix Friedrich and Patrick Schramowski and Kristian Kersting},
  Journal = {Journal of Artificial Intelligence Research (JAIR)},
  Note = {With the rise of deep learning in various applications, privacy concerns around the protection of training data has become a critical area of research. Whereas prior studies have focused on privacy risks in single-modal models, we introduce a novel method to assess privacy for multi-modal models, specifically vision-language models like CLIP. The proposed Identity Inference Attack (IDIA) reveals whether an individual was included in the training data by querying the model with images of the same person. Letting the model choose from a wide variety of possible text labels, the model reveals whether it recognizes the person and, therefore, was used for training. Our large-scale experiments on CLIP demonstrate that individuals used for training can be identified with very high accuracy. We confirm that the model has learned to associate names with depicted individuals, implying the existence of sensitive information that can be extracted by adversaries. Our results highlight the need for stronger privacy protection in large-scale models and suggest that IDIAs can be used to prove the unauthorized use of data for training and to enforce privacy laws.},
  Keywords = {Identity Inference Attacks, Privacy, Computer Vision, Pre-trained models, CLIP, Deep Learning},
  Publisher = {},
  url={https://arxiv.org/pdf/2209.07341.pdf},
  year={2024},
  volume={},
  pages={},
  issn={},
  doi={},
  url={}
}

@article{otto2024mlst,
	Anote={./images/otto2024mlst.png},
	author={Kevin Otto and Simon Burgis and Kristian Kersting and Reinhold Bertrand and Devendra Singh Dhami},
	Journal = {Machine Learning: Science and Technology (MLST)},
	note = {The number of satellites in orbit around Earth is increasing rapidly, with the risk of collision rising accordingly. Trends of the global population of satellites
need to be analyzed to test the viability and impact of proposed rules and laws affecting the satellite population and collision avoidance strategies. This requires
large scale simulations of satellites that are propagated on long timescales to compute the large amounts of actionable close encounters (called conjunctions), which could
lead to collisions. Rigorously checking for conjunctions by computing future states of orbits is computationally expensive due to the large amount of objects involved and
conjunction filters are thus used to remove non-conjuncting orbit pairs from the list of possible conjunctions. In this work, we explore the possibility of machine learning
(ML) based conjunction filters using several algorithms such as eXtreme Gradient Boosting, TabNet and (physics-informed) neural networks and deep operator networks.
To show the viability and the potential of ML based filters, these algorithms are trained to predict the future state of orbits. For the physics-informed approaches,
multiple partial differential equations are set up using the Kepler equation as a basis. The empirical results demonstrate that physics-informed deep operator networks are
capable of predicting the future state of orbits using these equations (RMSE: 0.136) and outperform eXtreme Gradient Boosting (RMSE: 0.568) and TabNet (RMSE: 0.459).
We also propose a filter based on the trained deep operator network which is shown to outperforms the filter capability of the commonly used perigee-apogee test and the
orbit path filter on a synthetic dataset, while being on average 3.2 times faster to compute than a rigorous conjunction check.},
	title={Machine Learning meets Kepler: Inverting Kepler’s Equation for All vs All Conjunction Analysis},
	Publisher = {Springer},
	year={2024},
	Crossref = {},
	url={},
	keywords={Gradient Boosting, TabNet, Physics-Informed Neural Networks, Satelite Conjunctions, Kepler Equation}
}


@article{zecevic2024acml,
Anote={./images/zecevic2023acml.png},
author={Matej Zecevic and Devendra Singh Dhami and Kristian Kersting},
note = {The recent years have been marked by extended research on adversarial attacks, especially on deep neural networks. With this work we intend on posing and investigating the question of whether the phenomenon might be more general in nature, that is, adversarial-style attacks outside classical classification tasks. Specifically, we investigate optimization problems as they constitute a fundamental part of modern AI research. To this end, we consider the base class of optimizers namely Linear Programs (LPs). On our initial attempt of a naïve mapping between the formalism of adversarial examples and LPs, we quickly identify the key ingredients missing for making sense of a reasonable notion of adversarial examples for LPs. Intriguingly, the formalism of Pearl's notion to causality allows for the right description of adversarial like examples for LPs. Characteristically, we show the direct influence of the Structural Causal Model (SCM) onto the subsequent LP optimization, which ultimately exposes a notion of confounding in LPs (inherited by said SCM) that allows for adversarial-style attacks. We provide both the general proof formally alongside existential proofs of such intriguing LP-parameterizations based on SCM for three combinatorial problems, namely Linear Assignment, Shortest Path and a real world problem of energy systems.},
journal={Machine Learning Journal (MLJ)},
title={Structural Causal Models Reveal Confounder Bias in Linear Program Modelling},
Publisher = {Springer},
year={2024},
Crossref = {},
url={../../papers/zecevic2024acml.pdf},
keywords={Adversarial Attack, Linear Programs, Causal Link, Structural Causal Model}
}


@article{sha2024nai,
  Anote = {./images/sha2023nesypi.png},
	title = {Neuro-Symbolic Predicate Invention: Learning Relational Concepts from Visual Scenes},
	author = {Jingyuan Sha and Hikaru Shindo and Kristian Kersting and Devendra Singh Dhami},
  Journal = {Neurosymbolic Artificial Intelligence Journal (NAIJ)},
  Note = {The predicates used for Inductive Logic Programming (ILP) systems are usually elusive and need to be hand-crafted in advance, which limits the generalization of the system when learning new rules without sufficient background knowledge. Predicate Invention (PI) for ILP is the problem of discovering new concepts that describe hidden relationships in the domain. PI can mitigate the generalization problem for ILP by inferring new concepts, giving the system a better vocabulary to compose logic ruless. Although there are several PI approaches for symbolic ILP systems, PI for NeSy ILP systems that can handle visual input to learn logical rules using differentiable reasoning is relatively unaddressed. To this end, we propose a neural-symbolic approach, NeSy-𝜋, to invent predicates from visual scenes for NeSy ILP systems based on clustering and extension of relational concepts. (𝜋 denotes the abbrivation of Predicate Invention). NeSy-𝜋 processes visual scenes as input using deep neural networks for the visual perception and invents new concepts that support the task of classifying complex visual scenes. The invented concepts can be used by any NeSy ILP systems instead of hand-crafted background knowledge. Our experiments show that the PI model is capable of inventing high-level concepts and solving complex visual logic patterns more efficiently and accurately in the absence of explicit background knowledge.Moreover, the invented concepts are explainable and interpretable, while also providing competitive results with state-of-the-art NeSy ILP systems based on given knowledge.},
	Keywords = {Differentiable Reasoning,  Inductive Logic Programming,  Neuro-Symbolic AI,  Object-centric Learning},
	Publisher = {IOS Press},
  url={https://neurosymbolic-ai-journal.com/system/files/nai-paper-712.pdf},
  year={2024},
  volume={712-1692},
  pages={},
  issn={},
  doi={}
}

@article{ochs2024remote,
  Anote = {./images/ochs2024remote.png},
  title = {Effective Risk Detection for Natural Gas Pipelines using Low Resolution Satellite Images},
  author={Daniel Ochs and Karsten Wiertz and Sebastian Bußmann and Kristian
Kersting and Devendra Singh Dhami},
  Journal = {Remote Sensing},
  Note = {Natural gas pipelines represent a critical infrastructure for most countries and thus their
safety is of paramount importance. To report potential risks along pipelines, several steps are taken
such as manual inspection and helicopter flights; however, these solutions are expensive and the
flights are environmentally unfriendly. Deep learning has demonstrated considerable potential in
handling a number of tasks in recent years as models rely on huge datasets to learn a specific task.
With the increasing number of satellites orbiting the Earth, remote sensing data have become widely
available, thus paving the way for automated pipeline monitoring via deep learning. This can result
in effective risk detection, thereby reducing monitoring costs while being more precise and accurate.
A major hindrance here is the low resolution of images obtained from the satellites, which makes
it difficult to detect smaller changes. To this end, we propose to use transformers trained with
low-resolution images in a change detection setting to detect pipeline risks. We collect PlanetScope
satellite imagery (3 m resolution) that captures certain risks associated with the pipelines and present
how we collected the data. Furthermore, we compare various state-of-the-art models, among which
ChangeFormer, a transformer architecture for change detection, achieves the best performance with a
70% F1 score. As part of our evaluation, we discuss the specific performance requirements in pipeline
monitoring and show how the model’s predictions can be shifted accordingly during training.},
  Keywords = {Remote Sensing, Satelite Images, Change Transformer, Chance Detection, Pipeline Monitoring},
  Publisher = {MDPI},
  year={2024},
  volume={16},
  pages={},
  issn={2072-4292},
  doi={10.3390/rs16020266},
  url={https://www.mdpi.com/2072-4292/16/2/266}
}

@incollection{shindo2024deisam,
  Anote={./images/shindo2024deisam.png},
  author = {Hikaru Shindo and Manuel Brack and Gopika Sudhakaran and Devendra Singh Dhami and Patrick Schramowski and Kristian Kersting},
  title = {DeiSAM: Segment Anything with Deictic Prompting},
  year = {2024},
  Url = {https://arxiv.org/abs/2402.14123},
  Pages = {},
  booktitle={AAAI 2024 Workshop on Neuro-Symbolic Learning and Reasoning
in the Era of Large Language Models (NucLeaR)},
  Note = {Large-scale, pre-trained neural networks have demonstrated strong capabilities in various tasks, including zero-shot image segmentation. To identify concrete objects in complex scenes, humans instinctively rely on deictic descriptions in natural language, i.e. , referring to something depending on the context, e.g. ”The object that is on the desk and behind the cup.”. However, deep learning approaches cannot reliably interpret these deictic representations due to their lack of reasoning capabilities in complex scenarios. To remedy this issue, we propose DeiSAM, which integrates large pre-trained neural networks with differentiable logic reasoners. Given a complex, textual segmentation description, DeiSAM leverages Large Language Models (LLMs) to generate first-order logic rules and performs differentiable forward reasoning on generated scene graphs. Subsequently, DeiSAM segments objects by matching them to the logically inferred image regions. As part of our evaluation, we propose the Deictic Visual Genome (DeiVG) dataset, containing paired visual input and complex, deictic textual prompts. Our empirical results demonstrate that DeiSAM is a substantial improvement over data-driven neural baselines on deictic segmentation tasks.},
  Keywords = {Neuro-Symbolic AI, Differentiable Reasoning, Segmentation, Textual Grounding}
}


@incollection{mathur2024dai,
  Anote={./images/mathur2024dai.png},
  author = {Saurabh Mathur and Sahil Sidheekh and Pranuthi Tenali and Eric Blasch and Kristian Kersting and Sriraam Natarajan},
  title = {Credibility-aware Reliable Multi-Modal Fusion Using Probabilistic Circuits},
  year = {2024},
  Url = {},
  Pages = {},
  booktitle={AAAI 2024 Workshop on Deployable AI (DAI)},
  Note = {We consider the problem of late multi-modal fusion for discriminative learning. Motivated by multi-source domains that require understanding the reliability of each data source, we explore the notion of credibility in the context of multi-modal fusion. We propose a combination function that uses probabilistic circuits (PCs) to combine predictive distributions over individual modalities. We also define a probabilistic measure to evaluate the credibility of each modality via inference queries over the PC. Our experimental evaluation demonstrates that our fusion method can reliably infer credibility while maintaining competitive performance with the state of the art.},
  Keywords = {multi-modal fusion, discriminative learning, source credibility, probabilistic circuit}
}


@misc{sudhakaran2023vision,
  Anote={./images/sgg_compare.png},
  author = {Gopika Sudhakaran and Devendra Singh Dhami and Kristian Kersting and Stefan Roth},
  title={Vision Relation Transformer for Unbiased Scene Graph Generation},
  year = {2023},
  Url = {https://openaccess.thecvf.com/content/ICCV2023/html/Sudhakaran_Vision_Relation_Transformer_for_Unbiased_Scene_Graph_Generation_ICCV_2023_paper.html},
  Pages = {},
  Crossref = {https://github.com/visinf/veto},
  Note = {Recent years have seen a growing interest in Scene Graph Generation (SGG), a comprehensive visual scene understanding task that aims to predict entity relationships using a relation encoder-decoder pipeline stacked on top of an object encoder-decoder backbone. Unfortunately, current SGG methods suffer from an information loss regarding the entities' local-level cues during the relation encoding process. To mitigate this, we introduce the Vision rElation TransfOrmer (VETO), consisting of a novel local-level entity relation encoder. We further observe that many existing SGG methods claim to be unbiased, but are still biased towards either head or tail classes. To overcome this bias, we introduce a Mutually Exclusive ExperT (MEET) learning strategy that captures important relation features without bias towards head or tail classes. Experimental results on the VG and GQA datasets demonstrate that VETO + MEET boosts the predictive performance by up to 47\% over the state of the art while being 10x smaller.},
  Keywords = {Scene Graphs, Scene Understanding}
}

@misc{keller2023images,
  Anote={./images/hex_graph.png},
  author = {Yannik Keller and Jannis Blüml and Gopika Sudhakaran and Kristian Kersting},
  title={From Images to Connections: Can DQN with GNNs learn the Strategic Game of Hex?},
  Howpublished = {arXiv preprint arXiv:2311.13414},
  year = {2023},
  Url = {https://arxiv.org/pdf/2311.13414},
  Pages = {},
  Crossref = {https://github.com/yannikkellerde/GNN_Hex},
  Note = {The gameplay of strategic board games such as chess, Go and Hex is often characterized by combinatorial, relational structures -- capturing distinct interactions and non-local patterns -- and not just images. Nonetheless, most common self-play reinforcement learning (RL) approaches simply approximate   policy and value functions using convolutional neural networks (CNN). A key feature of CNNs is their relational inductive bias towards locality and translational invariance. In contrast, graph neural networks (GNN) can encode more complicated and distinct relational structures. Hence, we investigate the crucial question: Can GNNs, with their ability to encode complex connections, replace CNNs in self-play reinforcement learning? To this end, we do a comparison with Hex -- an abstract yet strategically rich board game -- serving as our experimental platform. Our findings reveal that GNNs excel at dealing with long range dependency situations in game states and are less prone to overfitting, but also showing a reduced proficiency in discerning local patterns. This suggests a potential paradigm shift, signaling the use of game-specific structures to reshape self-play reinforcement learning.},
  Keywords = {Reinforcement Learning, Graph Neural Networks, Games, Hex, AlphaZero}
}

@incollection{hintersdorf23defendingbugs,
  Anote={./images/defending_with_backdoors.png},
  author = {Dominik Hintersdorf and Lukas Struppek and Daniel Neider and Kristian Kersting},
  title = {Defending Our Privacy With Backdoors},
  year = {2023},
  Url = {https://openreview.net/forum?id=M4ltSJufXU},
  Pages = {},
  booktitle={NeurIPS 2023 Workshop on Backdoors in Deep Learning - The Good, the Bad, and the Ugly},
  Note = {The proliferation of large AI models trained on uncurated, often sensitive web-scraped data has raised significant privacy concerns. One of the concerns is that adversaries can extract information about the training
  data using privacy attacks. Unfortunately, the task of removing specific information from the models without sacrificing performance is not straightforward and has proven to be challenging. We propose a rather easy yet
  effective defense based on backdoor attacks to remove private information such as names of individuals from models, and focus in this work on text encoders. Specifically, through strategic insertion of backdoors, we
  align the embeddings of sensitive phrases with those of neutral terms-"a person" instead of the person's name. Our empirical results demonstrate the effectiveness of our backdoor-based defense on CLIP by assessing its
  performance using a specialized privacy attack for zero-shot classifiers. Our approach provides not only a new "dual-use" perspective on backdoor attacks, but also presents a promising avenue to enhance the privacy of
  individuals within models trained on uncurated web-scraped data.},
  Keywords = {Security, Privacy, Backdoor Attacks, CLIP, Identity Inference Attacks}
}


@incollection{brack2023ledits,
      Anote = {./images/mbrack_ledits_pp.png},
      title={LEDITS++: Limitless Image Editing using Text-to-Image Models},
      author={Manuel Brack and Felix Friedrich and Katharina Kornmeier and Linoy Tsaban and Patrick Schramowski and Kristian Kersting and Apolinaros Passos},
      booktitle = {Workshop on Machine Learning for Creativity and Design at NeurIPS},
      year = {2023},
      month={Dez},
      Note = {Text-to-image diffusion models have recently received a lot of interest for their astonishing ability to produce high-fidelity images from text only. Subsequent research efforts are aiming to exploit the capabilities of these models and leverage them for intuitive, textual image editing. However, existing methods often require time-consuming fine-tuning and lack native support for performing multiple edits simultaneously. To address these issues, we introduce LEDITS++ , an efficient yet versatile technique for image editing using text-to-image models. LEDITS++ requires no tuning nor optimization, runs in a few diffusion steps, natively supports multiple simultaneous edits, inherently limits changes to relevant image regions, and is architecture agnostic.},
      Pages = {},
      Keywords = {Image Editing, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Semantics},
      Url={../../papers/brack2023ledits.pdf}
}

@incollection{struppek23leveraging,
  Anote={./images/backdoor_defense.png},
  author = {Lukas Struppek and Martin B. Hentschel and Clifton Poth and Dominik Hintersdorf and Kristian Kersting},
  title = {Leveraging Diffusion-Based Image Variations for Robust Training on Poisoned Data},
  year = {2023},
  Url = {https://arxiv.org/pdf/2310.06372},
  Pages = {},
  booktitle={NeurIPS 2023 Workshop on Backdoors in Deep Learning - The Good, the Bad, and the Ugly},
  Note = {Backdoor attacks pose a serious security threat for training neural networks as they surreptitiously introduce hidden functionalities into a model.
  Such backdoors remain silent during inference on clean inputs, evading detection due to inconspicuous behavior. However, once a specific trigger pattern appears in the input data,
  the backdoor activates, causing the model to execute its concealed function. Detecting such poisoned samples within vast datasets is virtually impossible through manual inspection.
  To address this challenge, we propose a novel approach that enables model training on potentially poisoned datasets by utilizing the power of recent diffusion models.
  Specifically, we create synthetic variations of all training samples, leveraging the inherent resilience of diffusion models to potential trigger patterns in the data.
  By combining this generative approach with knowledge distillation, we produce student models that maintain their general performance on the task while exhibiting robust resistance to backdoor triggers.},
  Keywords = {Security, Backdoor Attacks, Stable Diffusion, Text-to-Image Synthesis}
}


@article{friedrich2023xiltypology,
  Anote = {./images/friedrich2023xiltypology.png},
  title = {A typology for exploring the mitigation of shortcut behaviour},
  author={Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
  Journal = {Nature Machine Intelligence},
  Note = {As machine learning models become larger, and are increasingly trained on large and uncurated datasets in weakly supervised mode, it becomes important to establish mechanisms for inspecting, interacting with and revising models. These are necessary to mitigate shortcut learning effects and to guarantee that the model’s learned knowledge is aligned with human knowledge. Recently, several explanatory interactive machine learning methods have been developed for this purpose, but each has different motivations and methodological details. In this work, we provide a unification of various explanatory interactive machine learning methods into a single typology by establishing a common set of basic modules. We discuss benchmarks and other measures for evaluating the overall abilities of explanatory interactive machine learning methods. With this extensive toolbox, we systematically and quantitatively compare several explanatory interactive machine learning methods. In our evaluations, all methods are shown to improve machine learning models in terms of accuracy and explainability. However, we found remarkable differences in individual benchmark tasks, which reveal valuable application-relevant aspects for the integration of these benchmarks in the development of future methods.},
  Keywords = {Explanatory Interactive Machine Learning, XIL, Research Transparency, Research Comparability, Explainable Artificial Intelligence, XAI, Human-AI Interaction, Human-guided AI},
  Publisher = {Nature Publishing Group},
  year={2023},
  volume={5},
  pages={319-330},
  issn={2522-5839},
  doi={10.1038/s42256-023-00612-w},
  url={https://doi.org/10.1038/s42256-023-00612-w}
}


@article{struppek2023jair,
    Anote = {./images/struppek2023jair.png},
    Author = {Lukas Struppek and Dominik Hintersdorf and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
    Journal = {Journal of Artificial Intelligence Research (JAIR)},
    volume = {},
    pages = {},
    Keywords = {Generative AI, Text-guided image generation, Text-to-image synthesis, Multimodal Systems, Cultural
biases},
    Note = {Models for text-to-image synthesis, such as DALL-E 2 and Stable Diffusion, have recently drawn a lot of interest from academia and the general public. These models are capable of producing high-quality images that depict a variety of concepts and styles when conditioned on textual descriptions. However, these models adopt cultural characteristics associated with specific Unicode scripts from their vast amount of training data, which may not be immediately apparent. We show that by simply inserting single non-Latin characters in the textual description, common models reflect cultural biases in their generated images. We analyze this behavior both qualitatively and quantitatively and identify a model’s text encoder as the root cause of the phenomenon. Such behavior can be interpreted as a model feature, offering users a simple way to customize the image generation and reflect their own cultural background. Yet, malicious users or service providers may also try to intentionally bias the image generation. One goal might be to create racist stereotypes by replacing Latin characters with similarly-looking characters from non-Latin scripts, so-called homoglyphs. To mitigate such unnoticed script attacks, we propose a novel homoglyph unlearning method to fine-tune a text encoder, making it robust against homoglyph manipulations},
    Title = {Exploiting Cultural Biases via Homoglyphs in Text-to-Image Synthesis},
    Url = {https://jair.org/index.php/jair/article/view/15388/26991},
    Crossref = {},
    Year = {2023}
}


@article{stammer2023learning,
  Anote = {./images/stammer2023learning.png},
  title = {Learning by Self-Explaining},
  author={Wolfgang Stammer and Felix Friedrich and David Steinmann and Manuel Brack and Hikaru Shindo and Kristian Kersting},
  journal = {Transactions on Machine Learning Research (TMLR)},
  Note = {Artificial intelligence (AI) research has a long track record of drawing inspirations from findings from biology, in particular human intelligence. In contrast to current AI research that mainly treats explanations as a means for model inspection, a somewhat neglected finding from human psychology is the benefit of self-explaining in an agents' learning process. Motivated by this, we introduce a novel learning paradigm, termed Learning by Self-Explaining (LSX). The underlying idea is that a learning module (learner) performs a base task, e.g. image classification, and provides explanations to its decisions. An internal critic module next evaluates the quality of these explanations given the original task. Finally, the learner is refined with the critic's feedback and the loop is repeated as required. The intuition behind this is that an explanation is considered "good" if the critic can perform the same task given the respective explanation. Despite many implementation possibilities the structure of any LSX instantiation can be taxonomized based on four learning modules which we identify as: Fit, Explain, Reflect and Revise. In our work, we provide distinct instantiations of LSX for two different learner models, each illustrating different choices for the various LSX components. We broadly evaluate these on several datasets and show that Learning by Self-Explaining not only boosts the generalization abilities of AI models, particularly in small-data regimes, but also aids in mitigating the influence of confounding factors, as well as leading to more task specific and faithful model explanations. Overall, our results provide experimental evidence of the potential of self-explaining within the learning phase of an AI model.},
  Keywords = {Explanatory Interactive Machine Learning, XIL, Explainability, XAI, Interpretability},
  year={2024},
  pages={},
  url={https://arxiv.org/pdf/2309.08395}
}

@misc{steinmann2023learning,
  Anote = {./images/steinmann2023learning.png},
  title = {Learning to Intervene on Concept Bottlenecks},
  author={David Steinmann and Wolfgang Stammer and Felix Friedrich and Kristian Kersting},
  Howpublished = {arXiv preprint arXiv:2308.13453},
  Note = {While traditional deep learning models often lack interpretability, concept bottleneck models (CBMs) provide inherent explanations via their concept representations. Specifically, they allow users to perform interventional interactions on these concepts by updating the concept values and thus correcting the predictive output of the model. Traditionally, however, these interventions are applied to the model only once and discarded afterward. To rectify this, we present concept bottleneck memory models (CB2M), an extension to CBMs. Specifically, a CB2M learns to generalize interventions to appropriate novel situations via a two-fold memory with which it can learn to detect mistakes and to reapply previous interventions. In this way, a CB2M learns to automatically improve model performance from a few initially obtained interventions. If no prior human interventions are available, a CB2M can detect potential mistakes of the CBM bottleneck and request targeted interventions. In our experimental evaluations on challenging scenarios like handling distribution shifts and confounded training data, we illustrate that CB2M are able to successfully generalize interventions to unseen data and can indeed identify wrongly inferred concepts. Overall, our results show that CB2M is a great tool for users to provide interactive feedback on CBMs, e.g., by guiding a user's interaction and requiring fewer interventions.},
  year={2023},
  keywords={XAI, XIL, Interactive Learning, Interpretability},
  pages={},
  url={https://arxiv.org/abs/2308.13453}
}

@article{zecevic2023same,
Anote={./images/zecevic2023same.png},
author={Matej Zecevic and Devendra Singh Dhami and Kristian Kersting},
note = {Neurally-parameterized Structural Causal Models in the Pearlian notion to causality, referred to as NCM, were recently introduced as a step towards next-generation learning systems. However, said NCM are only concerned with the learning aspect of causal inference and totally miss out on the architecture aspect. That is, actual causal inference within NCM is intractable in that the NCM won’t return an answer to a query in polynomial time. This insight follows as corollary to the more general statement on the intractability of arbitrary structural causal model (SCM) parameterizations, which we prove in this work through classical 3-SAT reduction. Since future learning algorithms will be required to deal with both high dimensional data and highly complex mechanisms governing the data, we ultimately believe work on tractable inference for causality to be decisive. We also show that not all “causal” models are created equal. More specifically, there are models capable of answering causal queries that are not SCM, which we refer to as partially causal models (PCM). We provide a tabular taxonomy in terms of tractability properties for all of the different model families, namely correlation-based, PCM and SCM. To conclude our work, we also provide some initial ideas on how to overcome parts of the intractability of causal inference with SCM by showing an example of how parameterizing an SCM with SPN modules can at least allow for tractable mechanisms. With this work we hope that our insights can raise awareness for this novel research direction since achieving success with causality in real world downstream tasks will not only depend on learning correct models but also require having the practical ability to gain access to
model inferences.},
title={Not All Causal Inference is the Same},
journal={Transactions on Machine Learning Research (TMLR)},
issn={2835-8856},
Publisher = {},
year={2023},
Crossref = {},
url={https://openreview.net/pdf?id=ySWQ6eXAKp},
keywords={Causality, Structural Causal Models, Tractability, Neural Causal Models, Partially Causal Models}
}


@article{zecevic2023parrots,
Anote={./images/zecevic2023parrots.png},
author={Matej Zecevic and Moritz Willig and Devendra Singh Dhami and Kristian Kersting},
note = {Some argue scale is all what is needed to achieve AI, covering even causal models. We make it clear that large language models (LLMs) cannot be causal and give reason onto why sometimes we might feel otherwise. To this end, we define and exemplify a new subgroup of Structural Causal Model (SCM) that we call meta SCM which encode causal facts about other SCM within their variables. We conjecture that in the cases where LLM succeed in
doing causal inference, underlying was a respective meta SCM that exposed correlations
between causal facts in natural language on whose data the LLM was ultimately trained.
If our hypothesis holds true, then this would imply that LLMs are like parrots in that they
simply recite the causal knowledge embedded in the data. Our empirical analysis provides
favoring evidence that current LLMs are even weak ‘causal parrots.’},
journal={Transactions on Machine Learning Research (TMLR)},
title = {Causal Parrots: Large Language Models May Talk Causality But Are Not Causal},
issn={2835-8856},
Publisher = {},
year={2023},
Crossref = {},
url={https://openreview.net/forum?id=tv46tCzs83},
keywords={Large Language Models, Causality, Meta Causal, Structural Causal Models, Causal Parrots}
}


@article{shindo2023alphailp,
Anote={./images/shindo2023alphailp.png},
author={Hikaru Shindo and Viktor Pfanschilling and Devendra Singh Dhami and Kristian Kersting},
note = {Deep neural learning has shown remarkable performance at learning representations for visual object categorization. However, deep neural networks such as CNNs do not explicitly encode objects and relations among them. This limits their success on tasks that require a deep logical understanding of visual scenes, such as Kandinsky patterns and Bongard problems. To overcome these limitations, we introduce 𝛼ILP, a novel differentiable inductive logic programming framework that learns to represent scenes as logic programs—intuitively, logical atoms correspond to objects, attributes, and relations, and clauses encode high-level scene information. 𝛼ILP has an end-to-end reasoning architecture from visual inputs. Using it, 𝛼ILP performs differentiable inductive logic programming on complex visual scenes, i.e., the logical rules are learned by gradient descent. Our extensive experiments on Kandinsky patterns and CLEVR-Hans benchmarks demonstrate the accuracy and efficiency of 𝛼ILP in learning complex visual-logical concepts.},
Title={alphaILP: Thinking Visual Scenes as Differentiable Logic Programs},
Journal={Machine Learning Journal (MLJ)},
Publisher = {Springer},
Year={2023},
Crossref = {https://ml-research.github.io/alphailpdoc/},
url={https://link.springer.com/article/10.1007/s10994-023-06320-1},
keywords={Neuro-Symbolic AI, Differentiable Reasoning, Inductive Logic Programming, Object-centric Learning}
}


@article{skyagin2023slash,
    Anote = {./images/skryagin2023slash.png},
    Author = {Arseny Skryagin and Daniel Ochs and Devendra Singh Dhami and Kristian Kersting},
    Journal = {Journal of Artificial Intelligence Research (JAIR)},
    volume = {78},
    pages = {579--617},
    Keywords = {Statistical Relational Learning, Neural ASP, Neuralsymnolic AI, Probabilistic Circuits, Neural Networks, Neural Probabilistic Predicate},
    Note = {The goal of combining the robustness of neural networks and the expressiveness of symbolic
methods has rekindled the interest in Neuro-Symbolic AI. Deep Probabilistic Programming
Languages (DPPLs) have been developed for probabilistic logic programming to be carried
out via the probability estimations of deep neural networks (DNNs). However, recent
SOTA DPPL approaches allow only for limited conditional probabilistic queries and do
not offer the power of true joint probability estimation. In our work, we propose an easy
integration of tractable probabilistic inference within a DPPL. To this end, we introduce
SLASH, a novel DPPL that consists of Neural-Probabilistic Predicates (NPPs) and a logic
program, united via answer set programming (ASP). NPPs are a novel design principle
allowing for combining all deep model types and combinations thereof to be represented
as a single probabilistic predicate. In this context, we introduce a novel +/− notation
for answering various types of probabilistic queries by adjusting the atom notations of a
predicate. To scale well, we show how to prune the stochastically insignificant parts of the
(ground) program, speeding up reasoning without sacrificing the predictive performance.
We evaluate SLASH on a variety of different tasks, including the benchmark task of MNIST
addition and Visual Question Answering (VQA).},
    Title = {Scalable Neural-Probabilistic Answer Set Programming},
    Url = {https://arxiv.org/pdf/2306.08397.pdf},
    Crossref = {https://github.com/ml-research/SLASH},
    Year = {2023}
}


@article{yan2023privileged,
Anote={./images/yan2023privileged.png},
author={Siwen Yan and Phillip Odom and Rahul Pasunuri and Kristian Kersting and Sriraam Natarajan},
note = {We consider the problem of learning with sensitive features under the privileged information setting where the goal is to learn a classifier that uses features not available (or too sensitive to collect) at test/deployment time to learn a better model at training time. We focus on tree-based learners, specifically gradient boosted decision trees for learning with privileged information. Our methods use privileged features as knowledge to guide the algorithm when learning from fully observed (usable) features. We derive the theory, empirically validate the effectiveness of our algorithms and verify them on standard fairness metrics.},
Journal={Frontiers in Artificial Intelligence},
Title = {Learning with Privileged and Sensitive Information: A Gradient-Boosting Approach},
volume = {6},
Publisher = {Frontiers},
year={2023},
Crossref = {},
url={},
keywords={Gradient Boosting, Fairness, Priviledged Information, Machine Learning}
}


@inproceedings{willig2023consolidate,
  Anote={./images/willig2023consolidate.png},
  author       = {Moritz Willig and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  title        = {Do Not Marginalize Mechanisms, Rather Consolidate!},
  Keywords = {Causality, Marginalization, Consolidation, Interventions, Structural Causal Model, Simplification, Compression},
  year         = {2023},
  Url          = {https://openreview.net/pdf?id=xBhvMu4J03},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
  booktitle = {Structural causal models (SCMs) are a powerful tool for understanding the complex causal relationships that underlie many real-world systems. As these systems grow in size, the number of variables and complexity of interactions between them does, too. Thus, becoming convoluted and difficult to analyze. This is particularly true in the context of machine learning and artificial intelligence, where an ever increasing amount of data demands for new methods to simplify and compress large scale SCM. While methods for marginalizing and abstracting SCM already exist today, they may destroy the causality of the marginalized model. To alleviate this, we introduce the concept of consolidating causal mechanisms to transform large-scale SCM while preserving consistent interventional behaviour. We show consolidation is a powerful method for simplifying SCM, discuss reduction of computational complexity and give a perspective on generalizing abilities of consolidated SCM.}
}

@inproceedings{bellagente2023multifusion,
  Anote={./images/bellagente2023multifusion.png},
  author = {Marco Bellagente and Manuel Brack and Hannah Teufel and Felix Friedrich and Björn Deiseroth and Constantin Eichenberg and Andrew Dai and Robert Baldock and Souradeep Nanda and Koen Oostermeijer and Andres Felipe Cruz-Salinas and Patrick Schramowski and Kristian Kersting and Samuel Weinbach},
  title = {MultiFusion: Fusing Pre-Trained Models for Multi-Lingual, Multi-Modal Image Generation},
  year = {2023},
  Url = {https://openreview.net/pdf?id=9ych3krqP0},
  Pages = {},
  Note = {The recent popularity of text-to-image diffusion models (DM) can largely be attributed to the intuitive interface they provide to users. The intended generation can be expressed in natural language, with the model producing faithful interpretations of text prompts. However, expressing complex or nuanced ideas in text alone can be difficult. To ease image generation, we propose MultiFusion that allows one to express complex and nuanced concepts with arbitrarily interleaved inputs of multiple modalities and languages. MutliFusion leverages pre-trained models and aligns them for integration into a cohesive system, thereby avoiding the need for extensive training from scratch. Our experimental results demonstrate the efficient transfer of capabilities from individual modules to the downstream model. Specifically, the fusion of all independent components allows the image generation module to utilize multilingual, interleaved multimodal inputs despite being trained solely on monomodal data in a single language.},
  Keywords = {Image Synthesis, Image Generation, Diffusion, Multimodality, Multilingualism},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)}
}


@inproceedings{deiseroth2023atman,
  Anote={./images/deb2023atman.png},
  author       = {Björn Deiseroth and Mayukh Deb and Samuel Weinbach and Manuel Brack and Patrick Schramowski and Kristian Kersting},
  title        = {AtMan: Understanding Transformer Predictions Through Memory Efficient Attention Manipulation},
  Keywords = {Explainable AI, Transformer, Large Language Models, Multimodal, Computer Vision},
  year         = {2023},
  Url          = {https://openreview.net/pdf?id=PBpEb86bj7},
   Crossref     = {https://github.com/Aleph-Alpha/AtMan},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
  Note= {Generative transformer models have become increasingly complex, with large numbers of parameters and the ability to process multiple input modalities. Current methods for explaining their predictions are resource-intensive. Most crucially, they require prohibitively large amounts of additional memory since they rely on backpropagation which allocates almost twice as much GPU memory as the forward pass. This renders it difficult, if not impossible, to use explanations in production. We present AtMan that provides explanations of generative transformer models at almost no extra cost. Specifically, AtMan is a modality-agnostic perturbation method that manipulates the attention mechanisms of transformers to produce relevance maps for the input with respect to the output prediction. Instead of using backpropagation, AtMan applies a parallelizable token-based search method relying on cosine similarity neighborhood in the embedding space. Our exhaustive experiments on text and image-text benchmarks demonstrate that AtMan outperforms current state-of-the-art gradient-based methods on several metrics and models while being computationally efficient. As such, AtMan is suitable for use in large model inference deployments.}
}


@inproceedings{brack2023sega,
      Anote = {./images/sega_graphic.png},
      title={SEGA: Instructing Text-to-Image Models using Semantic Guidance},
      author={Manuel Brack and Felix Friedrich and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Kristian Kersting},
      year = {2023},
      month={Dez},
      Note = {Text-to-image diffusion models have recently received a lot of interest for their astonishing ability to produce high-fidelity images from text only. However, achieving one-shot generation that aligns with the user’s intent is nearly impossible, yet small changes to the input prompt often result in very different images. This leaves the user with little semantic control. To put the user in control, we show how to interact with the diffusion process to flexibly steer it along semantic directions. This semantic guidance (SEGA) generalizes to any generative architecture using classifier-free guidance. More importantly, it allows for subtle and extensive edits, composition and style changes, and optimizing the overall artistic conception. We demonstrate SEGA’s effectiveness on both latent and pixel-based diffusion models such as Stable Diffusion, Paella, and DeepFloyd-IF using a variety of tasks, thus providing strong evidence for its versatility and flexibility.},
      Pages = {},
      Keywords = {Representations, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Concepts, Semantics},
      Url={https://openreview.net/pdf?id=KIPAIy329j},
      booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)}
}

@inproceedings{yu2023neurips_cc,
  Anote = {./images/yu2023neurips_cc.png},
  Author = {Zhongjie Yu and Martin Trapp and Kristian Kersting},
  title  = {Characteristic Circuits},
  Keywords = {Characteristic Circuit, Characteristic Function, Probabilistic Circuit, Heterogeneous Data, Density Estimation, Mixed Models, Hybrid Data},
  year         = {2023},
  Url          = {https://openreview.net/pdf?id=5W7cXno10k},
  Crossref     = {https://github.com/ml-research/CharacteristicCircuits},
  Pages = {},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
  Note= {In many real-world scenarios it is crucial to be able to reliably and efficiently reason under uncertainty while capturing complex relationships in data.  Probabilistic circuits (PCs), a prominent family of tractable probabilistic models, offer a remedy to this challenge by composing simple, tractable distributions into a high-dimensional probability distribution. However, learning PCs on heterogeneous data is challenging and densities of some parametric distributions are not available in closed form, limiting their potential use.  We introduce characteristic circuits (CCs), a family of tractable probabilistic models providing a unified formalization of distributions over heterogeneous data in the spectral domain. The one-to-one relationship between characteristic functions and probability measures enables us to learn high-dimensional distributions on heterogeneous data domains and facilitates efficient probabilistic inference even when no closed-form density function is available. We show that the structure and parameters of CCs can be learned efficiently from the data and find that CCs outperform state-of-the-art density estimators for heterogeneous data domains on common benchmark data sets.}
}


@inproceedings{delfosse2023nudge,
  Anote={./images/delfosse2023nudge.png},
  author = {Quentin Delfosse and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
  title = {Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction},
  Keywords = {Reinforcement Learning,  Neuro-Symbolic AI, Interpretable and Explainable AI},
  booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
  year = {2023},
  Url = {https://openreview.net/pdf?id=PbMBfRpVgU},
  Pages = {},
  Crossref={https://github.com/k4ntz/LogicRL},
  Note = {The limited priors required by neural networks make them the dominating choice to encode and learn policies using reinforcement learning (RL). However, they are also black-boxes, making it hard to understand the agent's behaviour, especially when working on the image level. Therefore, neuro-symbolic RL aims at creating policies that are interpretable in the first place. Unfortunately, interpretability is not explainability. To achieve both, we introduce Neurally gUided Differentiable loGic policiEs (NUDGE). NUDGE exploits trained neural network-based agents to guide the search of candidate-weighted logic rules, then uses differentiable logic to train the logic agents. Our experimental evaluation demonstrates that NUDGE agents can induce interpretable and explainable policies while outperforming purely neural ones and showing good flexibility to environments of different initial states and problem sizes.}
}


@incollection{brack2023distilling,
      Anote = {./images/brack2023distilling.png},
      title={Distilling Adversarial Prompts from Safety Benchmarks: Report for the Adversarial Nibbler Challenge},
      author={Manuel Brack and Patrick Schramowski and Kristian Kersting},
      booktitle = {Working Notes of the AACL Workshop on the ART of Safety (ARTS): Workshop on Adversarial testing and Red-Teaming for generative AI},
      year = {2023},
      Note = {Text-conditioned image generation models have recently achieved astonishing image quality and alignment results. Consequently, they are employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the web, they also produce unsafe content. As a contribution to the Adversarial Nibbler challenge, we distill a large set of over 1,000 potential adversarial inputs from existing safety benchmarks. Our analysis of the gathered prompts and corresponding images demonstrates the fragility of input filters and provides further insights into systematic safety issues in current generative image models.},
      Pages = {},
      Keywords = {Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Safety, Adversarial Prompting},
      Url={https://arxiv.org/abs/2309.11575}
}

@incollection{yu2023tpm_cc,
  Anote = {./images/yu2023tpm_cc.png},
  Author = {Zhongjie Yu and Martin Trapp and Kristian Kersting},
  Booktitle = {Working Notes of the UAI Workshop on Tractable Probabilistic Modeling (TPM)},
  Note = {In many real-world scenarios it is crucial to be able to reliably and efficiently reason under uncertainty while capturing complex relationships in data.  Probabilistic circuits (PCs), a prominent family of tractable probabilistic models, offer a remedy to this challenge by composing simple, tractable distributions into a high-dimensional probability distribution. However, learning PCs on heterogeneous data is challenging and densities of some parametric distributions are not available in closed form, limiting their potential use.  We introduce characteristic circuits (CCs), a family of tractable probabilistic models providing a unified formalization of distributions over heterogeneous data in the spectral domain. The one-to-one relationship between characteristic functions and probability measures enables us to learn high-dimensional distributions on heterogeneous data domains and facilitates efficient probabilistic inference even when no closed-form density function is available. We show that the structure and parameters of CCs can be learned efficiently from the data and find that CCs outperform state-of-the-art density estimators for heterogeneous data domains on common benchmark data sets.},
  Keywords = {Characteristic Circuit, Characteristic Function, Probabilistic Circuit, Heterogeneous Data, Density Estimation},
  Pages = {},
  Title = {Characteristic Circuit},
  Url = {./papers/yu2023tpm_cc.pdf},
  crossref = {},
  Year = {2023}
}


@misc{delfosse2023nudge_arxiv,
  Anote={./images/delfosse2023nudge.png},
  author = {Quentin Delfosse and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
  title = {Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction},
  Keywords = {Reinforcement Learning,  Neuro-Symbolic AI, Interpretable and Explainable AI},
  Howpublished = {arXiv preprint arXiv:2306.01439},
  year = {2023},
  Url = {https://arxiv.org/abs/2306.01439},
  Pages = {},
  Crossref={https://github.com/k4ntz/LogicRL},
  Note = {The limited priors required by neural networks make them the dominating choice to encode and learn policies using reinforcement learning (RL). However, they are also black-boxes, making it hard to understand the agent's behaviour, especially when working on the image level. Therefore, neuro-symbolic RL aims at creating policies that are interpretable in the first place. Unfortunately, interpretability is not explainability. To achieve both, we introduce Neurally gUided Differentiable loGic policiEs (NUDGE). NUDGE exploits trained neural network-based agents to guide the search of candidate-weighted logic rules, then uses differentiable logic to train the logic agents. Our experimental evaluation demonstrates that NUDGE agents can induce interpretable and explainable policies while outperforming purely neural ones and showing good flexibility to environments of different initial states and problem sizes.}
}

@misc{shindo2023neumann,
  Anote={./images/shindo2023neumann.png},
  author = {Hikaru Shindo and Viktor Pfanschilling and Devendra Singh Dhami and Kristian Kersting},
  title = {Learning Differentiable Logic Programs for Abstract Visual Reasoning},
  Keywords = {Differentiable Reasoning,  Inductive Logic Programming,  Neuro-Symbolic AI,  Object-centric Learning, Graph Neural Network},
  Howpublished = {arXiv preprint arXiv:2307.00928},
  year = {2023},
  Url = {https://arxiv.org/abs/2307.00928},
  Pages = {},
  Crossref={https://sites.google.com/view/neumann-tuda},
  Note = {Visual reasoning is essential for building intelligent agents that understand the world and perform problem-solving beyond perception. Differentiable forward reasoning has been developed to integrate reasoning with gradient-based machine learning paradigms. However, due to the memory intensity, most existing approaches do not bring the best of the expressivity of first-order logic, excluding a crucial ability to solve abstract visual reasoning, where agents need to perform reasoning by using analogies on abstract concepts in different scenarios. To overcome this problem, we propose NEUro-symbolic Message-pAssiNg reasoNer (NEUMANN), which is a graph-based differentiable forward reasoner, passing messages in a memory-efficient manner and handling structured programs with functors. Moreover, we propose a computationally-efficient structure learning algorithm to perform explanatory program induction on complex visual scenes. To evaluate, in addition to conventional visual reasoning tasks, we propose a new task, visual reasoning behind-the-scenes, where agents need to learn abstract programs and then answer queries by imagining scenes that are not observed. We empirically demonstrate that NEUMANN solves visual reasoning tasks efficiently, outperforming neural, symbolic, and neuro-symbolic baselines.}
}


@incollection{sha2023nesy,
  author = {Jingyuan Sha and Hikaru Shindo and Kristian Kersting and Devendra Singh Dhami},
  title = { Neural-Symbolic Predicate Invention: Learning Relational Concepts from Visual Scenes},
  Anote = {./images/sha2023nesypi.png},
  Keywords = {Differentiable Reasoning,  Inductive Logic Programming,  Neuro-Symbolic AI,  Object-centric Learning},
  Note = {The predicates used for Inductive Logic Programming (ILP) systems are usually elusive and need to be hand-crafted in advance, which limits the generalization of the system when learning new rules without sufficient background knowledge. Predicate Invention (PI) for ILP is the problem of discovering new concepts that describe hidden relationships in the domain. PI can mitigate the generalization problem for ILP by inferring new concepts, giving the system a better vocabulary to compose logic ruless. Although there are several PI approaches for symbolic ILP systems, PI for NeSy ILP systems that can handle visual input to learn logical rules using differentiable reasoning is relatively unaddressed. To this end, we propose a neural-symbolic approach, NeSy-𝜋, to invent predicates from visual scenes for NeSy ILP systems based on clustering and extension of relational concepts. (𝜋 denotes the abbrivation of Predicate Invention). NeSy-𝜋 processes visual scenes as input using deep neural networks for the visual perception and invents new concepts that support the task of classifying complex visual scenes. The invented concepts can be used by any NeSy ILP systems instead of hand-crafted background knowledge. Our experiments show that the PI model is capable of inventing high-level concepts and solving complex visual logic patterns more efficiently and accurately in the absence of explicit background knowledge.Moreover, the invented concepts are explainable and interpretable, while also providing competitive results with state-of-the-art NeSy ILP systems based on given knowledge.},
  year={2023},
  booktitle = {Proceedings of the 17th International Workshop on Neural-Symbolic Learning and Reasoning (NeSy)},
  Url = {https://www.cs.ox.ac.uk/isg/conferences/tmp-proceedings/NeSy2023/paper8.pdf}
}


@misc{helff2023vlol,
  author = {Lukas Helff and Wolfgang Stammer and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
  title = {V-LoL: A Diagnostic Dataset for Visual Logical Learning},
  Anote = {./images/helff2023vlol.png},
  Keywords = {Visual Logical Learning, Diagnostic Dataset, Benchmark, Dataset},
  Note = {Despite the successes of recent developments in visual AI, different shortcomings still exist; from missing exact logical reasoning, to abstract generalization abilities, to understanding complex and noisy scenes. Unfortunately, existing benchmarks, were not designed to capture more than a few of these aspects. Whereas deep learning datasets focus on visually complex data but simple visual reasoning tasks, inductive logic datasets involve complex logical learning tasks, however, lack the visual component. To address this, we propose the visual logical learning dataset, V-LoL, that seamlessly combines visual and logical challenges. Notably, we introduce the first instantiation of V-LoL, V-LoL-Trains, – a visual rendition of a classic benchmark in symbolic AI, the Michalski train problem. By incorporating intricate visual scenes and flexible logical reasoning tasks within a versatile framework, V-LoL provides a platform for investigating a wide range of visual logical learning challenges. We evaluate a variety of AI systems including traditional symbolic AI, neural AI, as well as neuro-symbolic AI. Our evaluations demonstrate that even state-of-the-art AI faces difficulties in dealing with visual logical learning challenges, highlighting unique advantages and limitations specific to each methodology. Overall, V-LoL opens up new avenues for understanding and enhancing current abilities in visual logical learning for AI systems.},
  Howpublished = {arXiv preprint arXiv:2306.07743},
  year = {2023},
  Url = {https://arxiv.org/abs/2306.07743},
  Pages = {},
  Crossref={https://sites.google.com/view/v-lol}
}


@inproceedings{brack2023illume,
  author = {Manuel Brack and Patrick Schramowski and Björn Deiseroth and Kristian Kersting},
  title = {ILLUME: Rationalizing Vision-Language Models through Human Interactions},
  Anote = {./images/brack2022illume.png},
  Keywords = {Alignement, Self-Generated Explanations, XAI, Explanatory Interactive Learning},
  Note = {Bootstrapping from pre-trained language models has been proven to be an efficient approach for building vision-language models (VLM) for tasks such as image captioning or visual question answering. However, outputs of these models rarely align with user's rationales for specific answers. In order to improve this alignment and reinforce commonsense reasons, we propose a tuning paradigm based on human interactions with machine generated data. Our ILLUME executes the following loop: Given an image-question-answer prompt, the VLM samples multiple candidate rationales, and a human critic provides minimal feedback via preference selection, used for fine-tuning. This loop increases the training data and gradually carves out the VLM's rationalization capabilities that are aligned with human intend. Our exhaustive experiments demonstrate that ILLUME is competitive with standard supervised fine-tuning while using significantly fewer training data and only requiring minimal feedback.},
  year={2023},
  month={Jul},
  booktitle = {Proceedings of the 40th International Conference on Machine Learning (ICML)},
  Url = {https://arxiv.org/pdf/2208.08241.pdf}
}

@inproceedings{schramowski2022safe,
      Anote = {./images/schramowski2022safe.png},
      title={Safe Latent Diffusion: Mitigating Inappropriate Degeneration in Diffusion Models},
      author={Patrick Schramowski and Manuel Brack and Björn Deiseroth and Kristian Kersting},
      booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
      year = {2023},
      month={Jun},
      Note = {Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer, as we demonstrate, from degenerated and biased human behavior. In turn, they may even reinforce such biases. To help combat these undesired side effects, we present safe latent diffusion (SLD). Specifically, to measure the inappropriate degeneration due to unfiltered and imbalanced training sets, we establish a novel image generation test bed-inappropriate image prompts (I2P)-containing dedicated, real-world image-to-text prompts covering concepts such as nudity and violence. As our exhaustive empirical evaluation demonstrates, the introduced SLD removes and suppresses inappropriate image parts during the diffusion process, with no additional training required and no adverse effect on overall image quality or text alignment.},
      Pages = {},
      Keywords = {Safety, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Ethics},
      Url={https://openaccess.thecvf.com/content/CVPR2023/papers/Schramowski_Safe_Latent_Diffusion_Mitigating_Inappropriate_Degeneration_in_Diffusion_Models_CVPR_2023_paper.pdf}
}


@inproceedings{sudhakaran2023iccv,
  Anote={./images/sudhakaran2023iccv.png},
  Url = {https://openaccess.thecvf.com/content/ICCV2023/papers/Sudhakaran_Vision_Relation_Transformer_for_Unbiased_Scene_Graph_Generation_ICCV_2023_paper.pdf},
  author = {Gopika Sudhakaran and Devendra Dhami and Kristian Kersting and Stefan Roth},
  title = {Vision Relation Transformer for Unbiased Scene Graph Generation},
  Note = {Recent years have seen a growing interest in Scene Graph Generation (SGG), a comprehensive visual scene understanding task that aims to predict entity relationships using a relation encoder-decoder pipeline stacked on top of an object encoder-decoder backbone. Unfortunately, current SGG methods suffer from an information loss regarding the entities' parts during the relation encoding process. To address this, we introduce the Vision rElation TransfOrmer (VETO), consisting of a novel entity part-level relation encoder. We further observe that many existing SGG methods claim to be unbiased, but are still biased towards either head or tail classes. To overcome this bias, we introduce a Mutually Exclusive ExperT (MEET) learning strategy that captures important relation features without bias towards head or tail classes. Exhaustive experimental results on the VG and GQA datasets demonstrate that VETO + MEET boosts the predictive performance by up to 47% over the state of the art while being ~10x smaller.},
  year={2023},
  Pages = {},
  Keywords = {Transformer, Scence Graph Generation, Relational, Objects, Multimodal},
  booktitle={Proceedings of the 19th IEEE/CVF International Conference on Computer Vision (ICCV)}
}


@inproceedings{struppek2023iccv,
  Anote={./images/struppek2023iccv.png},
  author = {Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},
  title = {Rickrolling the Artist: Injecting Backdoors into Text Encoders for Text-to-Image Synthesis},
  Note = {While text-to-image synthesis currently enjoys great popularity among researchers and the general public, the security of these models has been neglected so far. Many text-guided image generation models rely on pre-trained text encoders from external sources, and their users trust that the retrieved models will behave as promised. Unfortunately, this might not be the case. We introduce backdoor attacks against text-guided generative models and demonstrate that their text encoders pose a major tampering risk. Our attacks only slightly alter an encoder so that no suspicious model behavior is apparent for image generations with clean prompts. By then inserting a single character trigger into the prompt, e.g., a non-Latin character or emoji, the adversary can trigger the model to either generate images with pre-defined attributes or images following a hidden, potentially malicious description. We empirically demonstrate the high effectiveness of our attacks on Stable Diffusion and highlight that the injection process of a single backdoor takes less than two minutes. Besides phrasing our approach solely as an attack, it can also force an encoder to forget phrases related to certain concepts, such as nudity or violence, and help to make image generation safer.},
  year={2023},
  Pages = {},
  Keywords = {Backdoor Attack, Generative AI, CLIP, Text2Image Synthesis, Homoglyphs},
  booktitle={Proceedings of the 19th IEEE/CVF International Conference on Computer Vision (ICCV)},
  Url = {https://openaccess.thecvf.com/content/ICCV2023/papers/Struppek_Rickrolling_the_Artist_Injecting_Backdoors_into_Text_Encoders_for_Text-to-Image_ICCV_2023_paper.pdf},
}


@inproceedings{friedrich2023ecai,
  author = {Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
  title = {Revision Transformers: Instructing Language Models to Change their Values},
  Anote = {./images/friedrich2023ecai.png},
  Keywords = {Transformer, Retriever, Revisions, Machine Ethics},
  Note = {Current transformer language models (LM) are large-scale models with billions of parameters. They have been shown to provide high performances on a variety of tasks but are also prone to shortcut learning and bias. Addressing such incorrect model behavior via parameter adjustments is very costly. This is particularly problematic for updating dynamic concepts, such as moral values, which vary culturally or interpersonally. In this work, we question the current common practice of storing all information in the model parameters and propose the Revision Transformer (RiT) employing information retrieval to facilitate easy model updating. The specific combination of a large-scale pre-trained LM that inherently but also diffusely encodes world knowledge with a clear-structured revision engine makes it possible to update the model's knowledge with little effort and the help of user interaction. We exemplify RiT on a moral dataset and simulate user feedback demonstrating strong performance in model revision even with small data. This way, users can easily design a model regarding their preferences, paving the way for more transparent and personalized AI models.},
  year={2023},
  booktitle = {Proceedings of the 26th European Conference on Artificial Intelligence (ECAI)},
  Url = {https://arxiv.org/pdf/2210.10332.pdf}
}


@inproceedings{sidheekh2023uai,
  Anote={./images/sidheekh2023uai.png},
  Url = {./papers/sidheekh2023uai.pdf},
  author = {Sahil Sidheekh and Kristian Kersting and Sriraam Natarajan},
  title = {Probabilistic Flow Circuits: Towards Unified Deep Models for Tractable Probabilistic Inference},
  Note = {We consider the problem of increasing the expressivity of probabilistic circuits by augmenting them with the successful generative models of normalizing flows. To this effect, we theoretically establish the requirement of decomposability for such combinations to retain tractability of the learned models. Our model, called Probabilistic Flow Circuits, essentially extends circuits by allowing for normalizing flows at the leaves. Our empirical evaluation clearly establishes the expressivity and tractability of this new class of probabilistic circuits},
  year={2023},
  Pages = {},
  Keywords = {Probabilistic Circuits, Normalizing Flows, Deep Learning, Tractablity},
  booktitle={Proceedings of the 39th Conference on Uncertainty in Artificial Intelligence (UAI)}
}


@inproceedings{ventola2023uai,
  Anote={./images/ventola-braun2023tdi.png},
  Url = {https://proceedings.mlr.press/v216/ventola23a/ventola23a.pdf},
  author = {Fabrizio Ventola* and Steven Braun* and Zhongjie Yu and Martin Mundt and Kristian Kersting},
  title = {Probabilistic Circuits That Know What They Don't Know},
   Note = {Probabilistic circuits (PCs) are models that allow exact and tractable probabilistic inference. In contrast to neural networks, they are often assumed to be well-calibrated and robust to out-of-distribution (OOD) data. In this paper, we show that PCs are in fact not robust to OOD data, i.e., they don’t know what they don’t know. We then show how this challenge can be overcome by model uncertainty quantification. To this end, we propose tractable dropout inference (TDI), an inference procedure to estimate uncertainty by deriving an analytical solution to Monte Carlo dropout (MCD) through variance propagation. Unlike MCD in neural networks, which comes at the cost of multiple network evaluations, TDI provides tractable sampling-free uncertainty estimates in a single forward pass. TDI improves the robustness of PCs to distribution shift and OOD data, demonstrated through a series of experiments evaluating the classification confidence and uncertainty estimates on real-world data.},
    year={2023},
    Pages = {},
    Keywords = {Probabilistic Circuits, Dropout, Out-Of-Distribution, Tractable Dropput Inference, Robustness},
    booktitle={Proceedings of the 39th Conference on Uncertainty in Artificial Intelligence (UAI)}
}


@inproceedings{delfosse2023ecml,
    Anote = {./images/delfosse2023ecml.png},
    title={Boosting Object Representation Learning via Motion and Object Continuity},
    author={Quentin Delfosse and Wolfgang Stammer and Thomas Rothenbächer and Dwarak Vittal and Kristian Kersting},
    Note = {Recent unsupervised multi-object detection models have shown impressive performance improvements, largely attributed to novel architectural inductive biases. Unfortunately, despite their good object localization and segmentation capabilities, their object encodings may still be suboptimal for downstream reasoning tasks, such as reinforcement learning. To overcome this, we propose to exploit object motion and continuity (objects do not pop in and out of existence). This is accomplished through two mechanisms: (i) providing temporal loss-based priors on object locations, and (ii) a contrastive object continuity loss across consecutive frames. Rather than developing an explicit deep architecture, the resulting unsupervised Motion and Object Continuity (MOC) training scheme can be instantiated using any object detection model baseline. Our results show large improvements in the performances of variational and slot-based models in terms of object discovery, convergence speed and overall latent object representations, particularly for playing Atari games. Overall, we show clear benefits of integrating motion and object continuity for downstream reasoning tasks, moving beyond object representation learning based only on reconstruction as well as evaluation based only on instance segmentation quality.},
    year={2023},
    Pages = {},
    url = {./papers/delfosse2023ecml.pdf},
    Keywords = {Object Continuity, Object-Centric Deep Learning, Reinforcement Learning, Motion Supervision},
    booktitle={Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD)},
    Url={}
}


@inproceedings{kohaut2023md,
  Anote={./images/kohaut2023md.png},
  title={Mission Design for Unmanned Aerial Vehicles using Hybrid Probabilistic Logic Programs},
  author={Simon Kohaut and Benedict Flade and Devendra Singh Dhami and Julian Eggert and Kristian Kersting},
  booktitle={26th IEEE International Intelligent Transportation Systems Conference (ITSC)},
  note = {Advanced Air Mobility (AAM) is a growing field
that demands a deep understanding of legal, spatial and temporal
concepts in navigation. Hence, any implementation of AAM is
forced to deal with the inherent uncertainties of human-inhabited
spaces. Enabling growth and innovation requires the creation of
a system for safe and robust mission design, i.e., the way we
formalise intentions and decide their execution as trajectories for
the Unmanned Aerial Vehicle (UAV). Although legal frameworks
have emerged to govern urban airspaces, their full integration
into the decision process of autonomous agents and operators
remains an open task. In this work we present ProMis, a system
architecture for probabilistic mission design. It links the data
available from various static and dynamic data sources with
legal text and operator requirements by following principles of
formal verification and probabilistic modeling. Using ProMis,
combining low-level perception and high-level rules in AAM
can be applied to the navigation frame of the UAV. To this
end, we employ Hybrid Probabilistic Logic Programs (HPLP)
as a unifying, intermediate representation between perception
and action-taking. Furthermore, we present methods to connect
ProMis with crowd-sourced map data by generating HPLP atoms
that represent spatial relations in a probabilistic fashion. Our
claims of the utility and generality of ProMis are supported by
experiments on a diverse set of scenarios and a discussion of the
computational demands associated with probabilistic missions.},
  Url = {https://www.aiml.informatik.tu-darmstadt.de/papers/kohaut2023promis.pdf},
  pages={},
  year={2023},
  Keywords = {Mission Design, Probabilistic Inference, Hybrid Probabilistic Logic}
}


@inproceedings{mathur2023psb,
    Anote = {./images/mathur2023psb.png},
    title={Exploiting Domain Knowledge as Causal Independencies in Modeling Gestational Diabetes},
    author={Saurabh Mathur and Athresh Karanam and Predrag Radivojac and David M. Haas and Kristian Kersting and Sriraam Natarajan},
    Note = {We consider the problem of modeling gestational diabetes in a clinical study and develop
a domain expert-guided probabilistic model that is both interpretable and explainable.
Specifically, we construct a probabilistic model based on causal independence (Noisy-Or)
from a carefully chosen set of features. We validate the efficacy of the model on the clinical
study and demonstrate the importance of the features and the causal independence model.},
    year={2023},
    Pages = {},
    Keywords = {Bayesian Network, Noisy-Or, Causal Independence, Gestational Diabete},
    booktitle={Proceedings of the 28th Pacific Symposium on Biocomputing (PSB)},
    Url={./papers/mathur2023psb.pdf}
}

@inproceedings{haemmerl2023fofindingsACL,
  url = {./papers/haemmerl2023fofindingsACL.pdf},
  author = {Katharina Hämmerl and Bjoern Deiseroth and Patrick Schramowski and Jindřich Libovický and Constantin Rothkopf and Alexander Fraser and Kristian Kersting },
  title = {Speaking Multiple Languages Affects the Moral Bias of Language Models},
  Anote = {./images/haemmerl2023fofindingsACL.png},
  Keywords = {Alignement, Values, Sovial Norms, LLM, multi-lingual},
  Note = {Pre-trained multilingual language models (PMLMs) are commonly used when dealing
with data from multiple languages and crosslingual transfer. However, PMLMs are trained
on varying amounts of data for each language. In practice this means their performance is often much better on English than many other languages. We explore to what extent this also
applies to moral norms. Do the models capture moral norms from English and impose them
on other languages? Do the models exhibit random and thus potentially harmful beliefs in
certain languages? Both these issues could negatively impact cross-lingual transfer and potentially lead to harmful outcomes. In this paper, we (1) apply the MORALDIRECTION framework to multilingual models, comparing results
in German, Czech, Arabic, Chinese, and English, (2) analyse model behaviour on filtered
parallel subtitles corpora, and (3) apply the models to a Moral Foundations Questionnaire,
comparing with human responses from different countries. Our experiments demonstrate
that PMLMs do encode differing moral biases, but these do not necessarily correspond to cultural differences or commonalities in human opinions.},
  year={2023},
  booktitle = {Findings of the Association for Computational Linguistics (ACL)},
  Url = {https://arxiv.org/pdf/2208.08241.pdf}
}


@misc{delfosse2023ocatari,
  Anote={./images/delfosse2023ocatari.png},
  author = {Quentin Delfosse and Jannis Blüml and Bjarne Gregori and Sebastian Sztwiertnia and Kristian Kersting},
  title = {OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments},
  Howpublished = {arXiv preprint arXiv:2306.08649},
  year = {2023},
  Url = {https://arxiv.org/abs/2306.08649},
  Pages = {},
  Crossref={https://github.com/k4ntz/OC_Atari},
  Note = {Cognitive science and psychology suggest that object-centric representations of complex
  scenes are a promising step towards enabling efficient abstract reasoning from low-level
  perceptual features. Yet, most deep reinforcement learning approaches rely on only pixel-based
  representations that do not capture the compositional properties of natural scenes. For this,
  we need environments and datasets that allow us to work and evaluate object-centric approaches.
  We present OCAtari, a set of environment that provides object-centric state representations of
  Atari games, the most-used evaluation framework for deep RL approaches. OCAtari also allows for
  RAM state manipulations of the games to change and create specific or even novel situations.},
  Keywords = {Object-centric, Atari, Benchmarks, Reinforcement Learning}
}

@incollection{weil2023knowYourEnemy,
  Anote={./images/weil2023knowYourEnemy.png},
  author={Jannis Weil and Johannes Czech and Tobias Meuser and Kristian Kersting},
  title={Know your Enemy: Investigating Monte-Carlo Tree Search with Opponent Models in Pommerman},
  booktitle={Proceedings of AAMAS Workshop Adaptive and Learning Agents (ALA)},
  Url={https://alaworkshop2023.github.io/papers/ALA2023_paper_45.pdf},
  Crossref={https://github.com/jw3il/PommerLearn},
  year={2023},
  Pages={},
  Note={In combination with Reinforcement Learning, Monte-Carlo Tree Search has shown to outperform human grandmasters in games such as Chess, Shogi and Go with little to no prior domain knowledge. However, most classical use cases only feature up to two players. Scaling the search to an arbitrary number of players presents a computational challenge, especially if decisions have to be planned over a longer time horizon. In this work, we investigate techniques that transform general-sum multiplayer games into single-player and two-player games that consider other agents to act according to given opponent models. For our evaluation, we focus on the challenging Pommerman environment which involves partial observability, a long time horizon and sparse rewards. In combination with our search methods, we investigate the phenomena of opponent modeling using heuristics and self-play. Overall, we demonstrate the effectiveness of our multiplayer search variants both in a supervised learning and reinforcement learning setting.},
  Keywords={Multi-Agent Planning, Supervised Learning, Reinforcement Learning}
}

@article{bluml2023alphaze,
    Anote = {./images/bluml2023alphaze.png},
    Author = {Jannis Blüml and Johannes Czech and Kristian Kersting},
    journal = {Frontiers in Artificial intelligence},
    Note = {In recent years, deep neural networks for strategy games have made significant progress. AlphaZero-like frameworks which combine Monte-Carlo tree search with reinforcement learning have been successfully applied to numerous games with perfect information. However, they have not been developed for domains where uncertainty and unknowns abound, and are therefore often considered unsuitable due to imperfect observations. Here, we challenge this view and argue that they are a viable alternative for games with imperfect information—a domain currently dominated by heuristic approaches or methods explicitly designed for hidden information, such as oracle-based techniques. To this end, we introduce a novel algorithm based solely on reinforcement learning, called AlphaZe**, which is an AlphaZero-based framework for games with imperfect information. We examine its learning convergence on the games Stratego and DarkHex and show that it is a surprisingly strong baseline, while using a model-based approach: it achieves similar win rates against other Stratego bots like Pipeline Policy Space Response Oracle (P2SRO), while not winning in direct comparison against P2SRO or reaching the much stronger numbers of DeepNash. Compared to heuristics and oracle-based approaches, AlphaZe** can easily deal with rule changes, e.g., when more information than usual is given, and drastically outperforms other approaches in this respect.},
    Keywords = {Imperfect Information Games, Deep Neural Networks, Reinforcement Learning, AlphaZero, Monte-Carlo tree search, Perfect Information Monte-Carlo},
    Pages = {},
    volume = {6},
    isbn = {doi:10.3389/frai.2023.1014561},
    Title = {AlphaZe**: AlphaZero-like baselines for imperfect information games are surprisingly strong},
    Url = {https://www.frontiersin.org/articles/10.3389/frai.2023.1014561/full},
    Crossref = {https://github.com/QueensGambit/CrazyAra},
    Year = {2023}}

@misc{czech2023representation,
  Anote={./images/czech2023representation.png},
  author = {Johannes Czech and Jannis Blüml and Kristian Kersting},
  title = {Representation Matters: The Game of Chess Poses a Challenge to Vision Transformers},
  Howpublished = {arXiv preprint arXiv:2304.14918},
  year = {2023},
  Url = {https://arxiv.org/pdf/2304.14918},
  Pages = {},
  Crossref = {https://github.com/QueensGambit/CrazyAra},
  Note = {While transformers have gained the reputation as the "Swiss army knife of AI", no one has challenged them to master the game of chess, one of the classical AI benchmarks. Simply using vision transformers (ViTs) within AlphaZero does not master the game of chess, mainly because ViTs are too slow. Even making them more efficient using a combination of MobileNet and NextViT does not beat what actually matters: a simple change of the input representation and value loss, resulting in a greater boost of up to 180 Elo points over AlphaZero.},
  Keywords = {Transformer, Input Representation, Loss Formulation, Chess, Monte-Carlo Tree Search, AlphaZero}
}

@misc{weil2023know,
  Anote={./images/weil2023knowYourEnemy.png},
  author={Jannis Weil and Johannes Czech and Tobias Meuser and Kristian Kersting},
  title = {Know your Enemy: Investigating Monte-Carlo Tree Search with Opponent Models in Pommerman},
  Howpublished = {arXiv preprint arXiv:2305.13206},
  year = {2023},
  Url = {https://arxiv.org/pdf/2305.13206},
  Crossref={https://github.com/jw3il/PommerLearn},
  Pages = {},
  Note={In combination with Reinforcement Learning, Monte-Carlo Tree Search has shown to outperform
  human grandmasters in games such as Chess, Shogi and Go with little to no prior domain knowledge.
  However, most classical use cases only feature up to two players. Scaling the search to an arbitrary
  number of players presents a computational challenge, especially if decisions have to be planned
  over a longer time horizon. In this work, we investigate techniques that transform general-sum
  multiplayer games into single-player and two-player games that consider other agents to act
  according to given opponent models. For our evaluation, we focus on the challenging Pommerman
  environment which involves partial observability, a long time horizon and sparse rewards. In
  combination with our search methods, we investigate the phenomena of opponent modeling using
  heuristics and self-play. Overall, we demonstrate the effectiveness of our multiplayer search
  variants both in a supervised learning and reinforcement learning setting.},
  Keywords={Multi-Agent Planning, Supervised Learning, Reinforcement Learning}
}

@inproceedings{flade2021error,
  Anote={./images/flade2021error.png},
  title={Error Decomposition for Hybrid Localization Systems},
  author={Flade, Benedict and Kohaut, Simon and Eggert, Julian},
  booktitle={2021 IEEE International Intelligent Transportation Systems Conference (ITSC)},
  Url = {https://www.honda-ri.de/pubs/pdf/4808.pdf},
  pages={149--156},
  year={2021},
  organization={IEEE},
  Keywords = {Accurate Global Positioning, Sensing, Vision, and Perception, Simulation and Modeling}
}

@misc{buhler2018dynamic,
  Anote={./images/buhler2018dynamic.png},
  title={Dynamic simulation model for an autonomous sailboat},
  author={Bühler, Moritz and Heinz, Carsten and Kohaut, Simon},
  year={2018},
  Pages = {},
  Howpublished={Universitäts-und Landesbibliothek Darmstadt},
  Url = {https://tuprints.ulb.tu-darmstadt.de/8471/7/sailboat_model_irsc.pdf},
  Keywords = {Simulation and Modeling, Autonomous Sailing}
}

@incollection{brack2023mitigating,
  Anote={./images/brack2023mitigating.png},
  author = {Manuel Brack and Felix Friedrich and Patrick Schramowski and Kristian Kersting},
  title = {Mitigating Inappropriateness in Image Generation: Can there be Value in Reflecting the World's Ugliness?},
  booktitle = {Workshop on Challenges of Deploying Generative AI at ICML & Workshop on Responsible Applied Artificial Intelligence (RAAIT) at ECAI},
  year = {2023},
  month={Jul},
  Url = {https://arxiv.org/pdf/2305.18398},
  Pages = {},
  Note = {Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the web, they also reproduce inappropriate human behavior. Specifically, we demonstrate inappropriate degeneration on a large-scale for various generative text-to-image models, thus motivating the need for monitoring and moderating them at deployment. To this end, we evaluate mitigation strategies at inference to suppress the generation of inappropriate content. Our findings show that we can use models' representations of the world's ugliness to align them with human preferences.},
  Keywords = {Image Synthesis, Image Generation, Diffusion, AI Ethics, Inappropriatness, Evaluation, Mitigation}
}

@incollection{friedrich2023oneexp,
  Anote = {./images/friedrich2023xiltypology.png},
  title = {One explanation does not fit XIL},
  author={Felix Friedrich and David Steinmann and Kristian Kersting},
  booktitle = {Tiny Paper in the Proceedings of the International Conference on Representation Learning (ICLR)},
  Note = {Current machine learning models produce outstanding results in many areas but, at the same time, suffer from shortcut learning and spurious correlations. To address such flaws, the explanatory interactive machine learning (XIL) framework has been proposed to revise a model by employing user feedback on a model's explanation. This work sheds light on the explanations used within this framework. In particular, we investigate simultaneous model revision through multiple explanation methods. To this end, we identified that one explanation does not fit XIL and propose considering multiple ones when revising models via XIL.},
  Keywords = {Explanatory Interactive Machine Learning (XIL), Explainable Artificial Intelligence (XAI), Human-AI Interaction, Human-guided AI},
  year={2023},
  Url={../../papers/friedrich2023oneexp.pdf}
}

@misc{struppek23caia,
  Anote={./images/caia.jpeg},
  author = {Lukas Struppek and Dominik Hintersdorf and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
  title = {Class Attribute Inference Attacks: Inferring Sensitive Class Information by Diffusion-Based Attribute Manipulations},
  Howpublished = {arXiv preprint arXiv:2303.09289},
  year = {2023},
  Url = {https://arxiv.org/pdf/2303.09289},
  Pages = {},
  Note = {Neural network-based image classifiers are powerful tools for computer vision tasks, but they inadvertently reveal sensitive
  attribute information about their classes, raising concerns about their privacy. To investigate this privacy leakage, we introduce the first
  Class Attribute Inference Attack (Caia), which leverages recent advances in text-to-image synthesis to infer sensitive attributes of individual
  classes in a black-box setting, while remaining competitive with related white-box attacks. Our extensive experiments in the face recognition
  domain show that Caia can accurately infer undisclosed sensitive attributes, such as an individual's hair color, gender and racial appearance,
  which are not part of the training labels. Interestingly, we demonstrate that adversarial robust models are even more vulnerable to such privacy
  leakage than standard models, indicating that a trade-off between robustness and privacy exists.},
  Keywords = {Privacy, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion}
}

@inproceedings{uhlig2022dlam,
      Anote = {./images/uhlig2022anomaly_detection_fuzzy_hashes.png},
      title={Combining AI and AM – Improving Approximate Matching through Transformer Networks},
      author={Frieder Uhlig and Lukas Struppek and Dominik Hintersdorf and Thomas Göbel and Harald Baier and Kristian Kersting},
      year={2023},
      booktitle={Proceedings of the Annual Digital Forensic Research Workshop (DFRWS) USA Conference},
      Note = {Approximate matching is a well-known concept in digital forensics to determine the similarity between digital artifacts.An important use case of approximate matching is the reliable and efficient detection of case-relevant data structures on a blacklist (e.g., malware or corporate secrets), if only fragments of the original are available. For instance, if only a cluster of indexed malware is still present during the digital forensic investigation, the approximate matching algorithm shall be able to assign the fragment to the
      blacklisted malware. However, traditional approximate matching functions like TLSH and ssdeep fail to detect files based on their fragments if the presented piece is relatively small compared to the overall file size (e.g., like one-third of the total file). A second well-known issue with traditional approximate matching algorithms is the lack of scaling due to the ever-increasing lookup databases. In this paper, we propose an improved matching algorithm based on transformer-based models from the field of natural language processing. We call our approach
      Deep Learning Approximate Matching (DLAM). As a concept from artificial intelligence, DLAM gets knowledge of characteristic blacklisted patterns during its training phase. Then DLAM is able to detect the patterns in a typically much larger file, that is DLAM focuses on the use case of fragment detection. Our evaluation is inspired by two widespread blacklist use cases: the detection of malware (e.g., in JavaScript) and corporate secrets (e.g., pdf or office documents). We reveal that DLAM has three key advantages compared to the prominent conventional approaches TLSH and ssdeep. First, it makes the tedious extraction of known to be bad parts obsolete, which is necessary until now before any search for them with approximate matching algorithms. This allows efficient classification of files on a much larger scale, which is important due to exponentially increasing data to be investigated. Second, depending on the use case, DLAM achieves a similar (in case of mrsh-cf and mrsh-v2) or
      even significantly higher accuracy (in case of ssdeep and TLSH) in recovering fragments of blacklisted files. For instance, in the case of JavaScript files, our assessment shows that DLAM provides an accuracy of 93\% on our test corpus, while TLSH and ssdeep show a classification accuracy of only 50%. Third, we show that DLAM enables the detection of file correlations in the output of TLSH and ssdeep even for fragment sizes, where the respective matching function of TLSH and ssdeep fails.},
      Pages = {},
      Keywords = {Approximate Matching, Fuzzy Hashes, Anomaly Detection, Deep Learning},
      Url={https://arxiv.org/pdf/2208.11367.pdf}
}


@misc{ye2023metalogic,
      Anote = {./images/ye2022metalogic.png},
      title={Differentiable Meta logical Programming},
      author={Zihan Ye and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
      Howpublished = {arXiv preprint arXiv:2211.11650},
      year = {2022},
      Pages = {},
      Keywords = {Differentiable Reasoning, Meta-Interpreter, Object-centric Reasoning},
      Url={https://arxiv.org/abs/2211.11650}
}

@misc{deiseroth2022logicrank,
      Anote = {./images/deiseroth2022logicrank.png},
      title={LogicRank: Logic Induced Reranking for Generative Text-to-Image Systems},
      author={Björn Deiseroth and Patrick Schramowski and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
      Howpublished = {arXiv preprint arXiv:2208.13518},
      year = {2022},
      Pages = {},
      Keywords = {Differentiable Reasoning, Image Generation, CLIP},
      Url={https://arxiv.org/abs/2208.13518}
}


@misc{friedrich2023fair,
      Anote = {./images/ffriedrich_fair_2023.png},
      title={Fair Diffusion: Instructing Text-to-Image Generation Models on Fairness},
      author={Felix Friedrich and Manuel Brack and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Sasha Luccioni and Kristian Kersting},
      Howpublished = {arXiv preprint arXiv:2302.10893},
      year = {2023},
      month={Feb},
      Note = {Generative AI models have recently achieved astonishing results in quality and are consequently employed in a fast-growing number of applications. However, since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer from degenerated and biased human behavior, as we demonstrate. In fact, they may even reinforce such biases. To not only uncover but also combat these undesired effects, we present a novel strategy, called Fair Diffusion, to attenuate biases after the deployment of generative text-to-image models. Specifically, we demonstrate shifting a bias, based on human instructions, in any direction yielding arbitrarily new proportions for, e.g., identity groups. As our empirical evaluation demonstrates, this introduced control enables instructing generative image models on fairness, with no data filtering and additional training required.},
      Pages = {},
      Keywords = {Fairness, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, AI Ethics},
      Url={https://arxiv.org/abs/2302.10893}
}


@article{pfeuffer2023xil,
Anote = {./images/pfeuffer2023xil.png},
title = {Explanatory Interactive Machine Learning: Establishing an Action Design Research Process for Machine Learning Projects},
journal = {Business & Information Systems Engineering},
pages = {},
year = {2023},
url = {},
author = {Nicolas Pfeuffer and Lorenz Baum and Wolfgang Stammer and Benjamin M. Abdel-Karim and Patrick Schramowski and Andreas M. Bucher and Christian Hügel and Gernot Rohde and Kristian Kersting and Oliver Hinz},
keywords = {Explainable AI, Interactive Learning, Machine Learning, Action Design Research, COVID-19, Imaging, Confounders },
Note = {The most promising standard machine learning methods can deliver highly accurate
classification results and often outperform standard white-box methods. However, for
humans, it is hardly possible to fully understand the rationale behind the black-box
results, and thus, these powerful methods hamper the creation of new knowledge on
the part of humans and the acceptance of this technology on a broader basis.
Explainable Artificial Intelligence tries to solve this problem by making the results more
interpretable, while Interactive Machine Learning integrates humans into the process of
insight discovery. We build upon recent successes of combining these two cuttingedge
technologies and propose how Explanatory Interactive Machine Learning (XIL) is
embedded in a generalizable Action Design Research (ADR) process – which we call
XIL-ADR. This approach can be used to analyze data, inspect models, and iteratively
improve them. We show the application of this process and use the diagnosis of viral
pneumonia, e.g., Covid-19, as an illustrative example. By this means, this paper also
illustrates how XIL-ADR can help identify shortcomings of standard machine learning
projects, gain new insights on the part of the human user, and thereby help to tap the
full potential of AI-based systems for organizations and research.}
}


@inproceedings{ramanan2023codscomad,
    Anote = {./images/ramanan2023codscomad.jpg},
    title={Active Feature Acquisition via Human Interaction in Relational domains},
    author={Nandini Ramanan and Phillip Odom and Kristian Kersting and Sriraam Natarajan},
    booktitle={6th Joint International Conference on Data Science & Management of Data (CODS-COMAD)},
    year={2023},
    pages = {70--78},
    url={./papers/ramanan2023codscomad.pdf},
    Note={We consider the problem of interactive and explainable active feature elicitation in relational domains in which a small subset of data is fully observed while the rest of the data is minimally observed. The goal is to identify the most informative set of entities for whom acquiring additional relations would yield a more robust model. We assume the presence of a human expert who can interactively provide the relations. Thus there is a need for an explainable model. Consequently, we employ an relational tree-based distance metric to identify the most diverse set of relational examples (entities) to obtain more relational feature information on. The model that is learned iteratively is an interpretable and explainable model that is presented to the human expert for eliciting additional features. Our empirical evaluation demonstrates both the efficiency and the interpretability of the proposed approach.},
    Keywords = {Human Interaction, Relational Learning, Feature Acquisition, Boosting, Non-Parametric}
}


@misc{brack2022Stable,
      Anote = {./images/sega_graphic.png},
      title={The Stable Artist: Steering Semantics in Diffusion Latent Space},
      author={Manuel Brack and Patrick Schramowski and Felix Friedrich and Dominik Hintersdorf and Kristian Kersting},
      Howpublished = {arXiv preprint arXiv:2212.06013},
      year = {2022},
      month={Dez},
      Note = {Large, text-conditioned generative diffusion models have recently gained a lot of attention for their impressive performance in generating high-fidelity images from text alone. However, achieving high-quality results is almost unfeasible in a one-shot fashion. On the contrary, text-guided image generation involves the user making many slight changes to inputs in order to iteratively carve out the envisioned image. However, slight changes to the input prompt often lead to entirely different images being generated, and thus the control of the artist is limited in its granularity. To provide flexibility, we present the Stable Artist, an image editing approach enabling fine-grained control of the image generation process. The main component is semantic guidance (SEGA) which steers the diffusion process along variable numbers of semantic directions. This allows for subtle edits to images, changes in composition and style, as well as optimization of the overall artistic conception. Furthermore, SEGA enables probing of latent spaces to gain insights into the representation of concepts learned by the model, even complex ones such as 'carbon emission'. We demonstrate the Stable Artist on several tasks, showcasing high-quality image editing and composition.},
      Pages = {},
      Keywords = {Representations, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Concepts, Semantics},
      Url={https://arxiv.org/abs/2212.06013}
}


@inproceedings{schuhmann2022laionb,
    Anote = {./images/laion5b.jpg},
    title={{LAION}-5B: An open large-scale dataset for training next generation image-text models},
    author={Christoph Schuhmann and Romain Beaumont and Richard Vencu and Cade W Gordon and Ross Wightman and Mehdi Cherti and Theo Coombes and Aarush Katta and Clayton Mullis and Mitchell Wortsman and Patrick Schramowski and Srivatsa R Kundurthy and Katherine Crowson and Ludwig Schmidt and Robert Kaczmarczyk and Jenia Jitsev},
    booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
    year={2022},
    url={https://openreview.net/forum?id=M3Y74vmsMcY},
    Note={We present LAION-5B, an open, publically available dataset of 5.8B image-text pairs and validate it by reproducing results of training state-of-the-art CLIP models of different scale.},
    Keywords = {multi-modal learning, large-scale datasets, reproducibility, open source, CLIP}
}

@inproceedings{alexopoulos2022how,
  Anote = {./images/brack2022vullifetimes.png},
  author    = {Nikolaos Alexopoulos and
               Manuel Brack and
               Jan Philipp Wagner and
               Tim Grube and
               Max Mühlhäuser},
  title     = {How Long Do Vulnerabilities Live in the Code? A Large-Scale Empirical
               Measurement Study on FOSS Vulnerability Lifetimes},
  booktitle = {31st {USENIX} Security Symposium},
  publisher = {{USENIX} Association},
  year      = {2022},
  url       = {https://www.usenix.org/conference/usenixsecurity22/presentation/alexopoulos},
  Note={How long do vulnerabilities live in the repositories of large, evolving projects? Although the question has been identified as an interesting problem by the software community in online forums, it has not been investigated yet in adequate depth and scale, since the process of identifying the exact point in time when a vulnerability was introduced is particularly cumbersome. In this paper, we provide an automatic approach for accurately estimating how long vulnerabilities remain in the code (their lifetimes). Our method relies on the observation that while it is difficult to pinpoint the exact point of introduction for one vulnerability, it is possible to accurately estimate the average lifetime of a large enough sample of vulnerabilities, via a heuristic approach. With our approach, we perform the first large-scale measurement of Free and Open Source Software vulnerability lifetimes, going beyond approaches estimating lower bounds prevalent in previous research. We find that the average lifetime of a vulnerability is around 4 years, varying significantly between projects (~2 years for Chromium, ~7 years for OpenSSL). The distribution of lifetimes can be approximately described by an exponential distribution. There are no statistically significant differences between the lifetimes of different vulnerability types when considering specific projects. Vulnerabilities are getting older, as the average lifetime of fixed vulnerabilities in a given year increases over time, influenced by the overall increase of code age. However, they live less than non-vulnerable code, with an increasing spread over time for some projects, suggesting a notion of maturity that can be considered an indicator of quality. While the introduction of fuzzers does not significantly reduce the lifetimes of memory-related vulnerabilities, further research is needed to better understand and quantify the impact of fuzzers and other tools on vulnerability lifetimes and on the security of codebases.}
}


@unpublished{kersting2022welt_clone,
    Anote = {./images/WeltAmSonntag.png},
    title={Machines like us would have feelings too},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, October 16, 2022},
    Publisher = {},
    Note = {Should we clone humans using AI and robotics, if we were able to do so?},
    Keywords = {AI, Clone, Humans, Machines, Optimus, Tesla, Elon Musk, Ethics, Robots},
    Url={}
}


@unpublished{kersting2022welt_art,
    Anote = {./images/dieWelt_logo.png},
    title={The Image Machine},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Die Welt, September 16, 2022},
    Publisher = {},
    Note = {An AI-generated image wins an art competition. It won't end the world. We need to figure out how to use this AI to our advantage. },
    Keywords = {AI, Benefits, Stable Diffusion, Generative Model, Art, Creativity},
    Url={}
}


@article{schwegmann2022energy,
Anote = {./images/schwegmann2022energy.jpg},
title = {Enabling Virtual Met Masts for wind energy applications through machine learning-methods},
journal = {Energy and AI},
pages = {100209},
year = {2022},
issn = {2666-5468},
url = {https://www.sciencedirect.com/science/article/pii/S2666546822000556},
author = {Sandra Schwegmann and Janosch Faulhaber and Sebastian Pfaffel and Zhongjie Yu and Martin Dörenkämper and Kristian Kersting and Julia Gottschall},
keywords = {Virtual Met Mast (VMM), Wind power, Machine learning, Reanalysis data, Site assessment, Weather Research and Forecasting Model (WRF), Measure-Correlate-Predict (MCP)},
Note = {As wind is the basis of all wind energy projects, a precise knowledge about its availability is needed. For an analysis of the site-specific wind conditions, Virtual Meteorological Masts (VMMs) are frequently used. VMMs make use of site calibrated numerical data to provide precise wind estimates during all phases of a wind energy project. Typically, numerical data are used for the long-term correlation that is required for estimating the yield of new wind farm projects. However, VMMs can also be used to fill data gaps or during the operational phase as an additional reference data set to detect degrading sensors. The value of a VMM directly depends on its ability and precision to reproduce site-specific environmental conditions. Commonly, linear regression is used as state of the art to correct reference data to the site-specific conditions. In this study, a framework of 10 different machine-learning methods is tested to investigated the benefit of more advanced methods on two offshore and one onshore site. We find significantly improving correlations between the VMMs and the reference data when using more advanced methods and present the most promising ones. The K-Nearest Neighbors and AdaBoost regressors show the best results in our study, but Multi-Output Mixture of Gaussian Processes is also very promising. The use of more advanced regression models lead to decreased uncertainties; hence those methods should find its way into industrial applications. The recommended regression models can serve as a starting point for the development of end-user applications and services.}
}

@unpublished{kersting2022welt_goodbad,
    Anote = {./images/WeltAmSonntag.png},
    title={Good AI, Bad AI},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, August 21, 2022, page 20},
    Publisher = {},
    Note = {If we want to make progress on AI, we need to talk more about the benefits it can bring to all of us.},
    Keywords = {AI, Benefits, Downsides, Optimistic, Digital Biology, Protein Folding, Gene Experssion, Education},
    Url={}
}

@incollection{trapp2022corsets_tpm,
      Anote = {./images/trapp2022corsets_tpm.png},
      booktitle = {Working Notes of the 5th Workshop on Tractable Probabilistic Modeling (TPM)},
      title={Towards Coreset Learning in Probabilistic Circuits},
      author={Martin Trapp and Steven Lang and Aastha Shah and Martin Mundt and Kristian Kersting and Arno Solin},
      Note = {Probabilistic circuits (PCs) are a powerful family
of tractable probabilistic models, guaranteeing efficient and exact computation of many probabilistic
inference queries. However, their sparsely structured nature makes computations on large data sets
challenging to perform. Recent works have focused
on tensorized representations of PCs to speed up
computations on large data sets. In this work, we
present an orthogonal approach by sparsifying the
set of n observations and show that finding a coreset of k  n data points can be phrased as a monotone submodular optimisation problem which can
be solved greedily for a deterministic PCs of |G|
nodes in O(k n |G|). Finally, we verify on a series
of data sets that our greedy algorithm outperforms
random selection },
      year={2022},
      Pages = {},
      Keywords = {Corsets, Probabilistic Circuits, Deep Learning, Efficient Learning, Generative Model},
      Url={./papers/trapp2022corsets_tpm.pdf}
}

@article{brugger2022pythopathology,
          Anote = {./images/brugger2022pythopathology.png},
          Author = {Anna Brugger and Facundo Ispizua Yamati and Abel Barreto and Stefan Paulus and Patrick Schramowski and Kristian Kersting and Ulrike Steiner and Susanne Neugart and Anne-Katrin Mahlein},
          Journal = {Pythopathology},
          Keywords = {Hyperspectral imaging, sugar beet, HPLC, plant metabolites, machine learning, UV-range},
          Note = {Fungal infections trigger defense or signaling responses in plants, leading to various changes in plant metabolites. The changes in metabolites, for example chlorophyll or flavonoids, have long been detectable using time-consuming destructive analytical methods including high-performance liquid chromatography or photometric determination. Recent plant phenotyping studies have revealed that hyperspectral imaging (HSI) in the UV-range can be used to link spectral changes with changes in plant metabolites. To compare established destructive analytical methods with new non-destructive hyperspectral measurements, the interaction between sugar beet leaves and the pathogens Cercospora beticola, which causes Cercospora leaf spot disease (CLS), and Uromyces betae, which causes sugar beet rust (BR), was investigated. With the help of destructive analyses, we showed that both diseases have different effects on chlorophylls, carotenoids, flavonoids, and several phenols. Non-destructive hyperspectral measurements in the UV-range revealed different effects of CLS and BR on plant metabolites resulting in distinct reflectance patterns. Both diseases resulted in specific spectral changes that allowed differentiation between the two diseases. Machine learning algorithms enabled the differentiation between the symptom classes and recognition of the two sugar beet diseases. Feature importance analysis identified specific wavelengths important to the classification, highlighting the utility of the UV-range. The study demonstrates that HSI in the UV-range is a promising, non-destructive tool to investigate the influence of plant diseases on plant physiology and biochemistry.},
          Pages = {44-45},
          Publisher = {APS Publications},
          Title = {Hyperspectral imaging in the UV-range allows for differentiation of sugar beet diseases based on changes of secondary plant metabolites},
          Url = {https://doi.org/10.1094/PHYTO-03-22-0086-R},
          volume = {113},
          isbn = {},
          number = {1},
          Year = {2023}
}

@inproceedings{yu2022whittle,
    Anote = {./images/yu2022whittle.png},
    title={Predictive Whittle Networks for Time Series},
    author={Zhongjie Yu and Fabrizio Ventola and Nils Thoma and Devendra Singh Dhami and Martin Mundt and Kristian Kersting},
    Note = {Recent developments have shown that modeling in the spectral domain improves the accuracy in time series forecasting. However, state-of-the-art neural spectral forecasters do not generally yield trustworthy predictions. In particular, they lack the means to gauge predictive likelihoods and provide uncertainty estimates. We propose predictive Whittle networks to bridge this gap, which exploit both the advances of neural forecasting in the spectral domain and leverage tractable likelihoods of probabilistic circuits. For this purpose, we propose a novel Whittle forecasting loss that makes use of these predictive likelihoods to guide the training of the neural forecasting component. We demonstrate how predictive Whittle networks improve real-world forecasting accuracy, while also allowing a transformation back into the time domain, in order to provide the necessary feedback of when the model's prediction may become erratic.},
    year={2022},
    Pages = {},
    Keywords = {Short Time Fourier Transform, Whittle Likelihood, Probabilistic Circuits, Deep Neural Networks, LSTM},
    booktitle={Proceedings of the 38th Conference on Uncertainty in Artificial Intelligence (UAI), PMLR 180:2320-2330},
    Url={./papers/yu2022whittle.pdf}
}


@inproceedings{mundt2022clevacompass,
    booktitle = {Proceedings of the International Conference on Representation Learning (ICLR) },
      title={CLEVA-Compass: A Continual Learning EValuation Assessment Compass to Promote Research Transparency and Comparability},
      author={Martin Mundt and Steven Lang and Quentin Delfosse and Kristian Kersting},
      year={2022},
      Keywords={Continual Learning, Lifelong Learning, Machine Learning Evaluation},
      Anote={./images/CLEVA-Compass.png},
      Note={What is the state of the art in continual machine learning? Although a natural question for predominant static benchmarks, the notion to train systems in a lifelong manner entails a plethora of additional challenges with respect to set-up and evaluation. The latter have recently sparked a growing amount of critiques on prominent algorithm-centric perspectives and evaluation protocols being too narrow, resulting in several attempts at constructing guidelines in favor of specific desiderata or arguing against the validity of prevalent assumptions. In this work, we depart from this mindset and argue that the goal of a precise formulation of desiderata is an ill-posed one, as diverse applications may always warrant distinct scenarios. Instead, we introduce the Continual Learning EValuation Assessment Compass, CLEVA-Compass for short. The compass provides the visual means to both identify how approaches are practically reported and how works can simultaneously be contextualized in the broader literature landscape. In addition to promoting compact specification in the spirit of recent replication trends, the CLEVA-Compass thus provides an intuitive chart to understand the priorities of individual systems, where they resemble each other, and what elements are missing towards a fair comparison.},
      Crossref={https://github.com/ml-research/CLEVA-Compass},
      Url={https://openreview.net/pdf?id=rHMaBYbkkRJ}
}


@inproceedings{stammer2022cvpr,
      booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
      title={Interactive Disentanglement: Learning Concepts by Interacting with their Prototype Representations},
      author={Wolfgang Stammer and Marius Memmel and Patrick Schramowski and Kristian Kersting},
      year={2022},
      Keywords={Explanatory Interactive Learning, XAI, Concept Swapping Networks, Prototype Networks, Elementary Concept Learning},
      Anote={./images/stammer2022cvpr.png},
      Note={Learning visual concepts from raw images without strong supervision is a challenging task. In this work, we show the
      advantages of prototype representations for understanding and revising the latent space of neural concept learners. For this purpose,
      we introduce interactive Concept Swapping Networks (iCSNs), a novel framework for learning concept-grounded representations via weak
      supervision and implicit prototype representations. iCSNs learn to bind conceptual information to specific prototype slots by swapping
      the latent representations of paired images. This semantically grounded and discrete latent space facilitates human
      understanding and human-machine interaction. We support this claim by conducting experiments on our novel data set ``Elementary
      Concept Reasoning'' (ECR), focusing on visual concepts shared by geometric objects.},
      Crossref={},
      Url={./papers/stammer2022cvpr.pdf}
}


@inproceedings{struppek2022ppa,
      Anote = {./images/struppek_ppa.jpg},
      title={Plug & Play Attacks: Towards Robust and Flexible Model Inversion Attacks},
      author={Lukas Struppek and Dominik Hintersdorf and Antonio De Almeida Correia and Antonia Adler and Kristian Kersting},
      Note = {Model inversion attacks (MIAs) aim to create synthetic images that reflect the class-wise characteristics from a target classifier's training data by exploiting the model's learned knowledge. Previous research has developed generative MIAs using generative adversarial networks (GANs) as image priors that are tailored to a specific target model. This makes the attacks time- and resource-consuming, inflexible, and susceptible to distributional shifts between datasets. To overcome these drawbacks, we present Plug \& Play Attacks that loosen the dependency between the target model and image prior and enable the use of a single trained GAN to attack a broad range of targets with only minor attack adjustments needed. Moreover, we show that powerful MIAs are possible even with publicly available pre-trained GANs and under strong distributional shifts, whereas previous approaches fail to produce meaningful results. Our extensive evaluation confirms the improved robustness and flexibility of Plug \& Play Attacks and their ability to create high-quality images revealing sensitive class characteristics.},
      year={2022},
      Pages = {20522--20545},
      Keywords = {Model Inversion Attacks, Secure AI, Privacy, Generative Adversarial Networks},
      booktitle={Proceedings of the 39th International Conference on Machine Learning (ICML)},
      Url={https://proceedings.mlr.press/v162/struppek22a/struppek22a.pdf}
}

@inproceedings{hintersdorf2022ijcai_trust,
      Anote = {./images/hintersdorf2021mi.png},
      title={To Trust or Not To Trust Prediction Scores for Membership Inference Attacks},
      author={Dominik Hintersdorf and Lukas Struppek and Kristian Kersting},
      year={2022},
      booktitle={Proceedings of the 31st International Joint Conference on Artificial Intelligence and the 25th European Conference on Artificial Intelligence ({IJCAI-ECAI})},
      Note = {Membership inference attacks (MIAs) aim to determine whether a specific sample was used to train a predictive model. Knowing this may indeed lead to a privacy breach. Most MIAs, however, make use of the model's prediction scores - the probability of each output given some input - following the intuition that the trained model tends to behave differently on its training data. We argue that this is a fallacy for many modern deep network architectures. Consequently, MIAs will miserably fail since overconfidence leads to high false-positive rates not only on known domains but also on out-of-distribution data and implicitly acts as a defense against MIAs. Specifically, using generative adversarial networks, we are able to produce a potentially infinite number of samples falsely classified as part of the training data. In other words, the threat of MIAs is overestimated, and less information is leaked than previously assumed. Moreover, there is actually a trade-off between the overconfidence of models and their susceptibility to MIAs: the more classifiers know when they do not know, making low confidence predictions, the more they reveal the training data.},
      Pages = {},
      Keywords = {Membership Inference Attacks, Privacy, Deep Learning, ResNets, Tradeoff, Overconfidence, OOD},
      Url={https://www.ijcai.org/proceedings/2022/0422.pdf}
}


@inproceedings{xie2022ijcai_verify,
      Anote = {./images/xie2022ijcai_verify.png},
      title={Neuro-Symbolic Verification of Deep Neural Networks},
      author={Xuan Xie and Kristian Kersting and Daniel Neider},
      year={2022},
      booktitle={Proceedings of the 31st International Joint Conference on Artificial Intelligence and the 25th European Conference on Artificial Intelligence ({IJCAI-ECAI})},
      Note = {Formal verification has emerged as a powerful approach
to ensure the safety and reliability of deep
neural networks. However, current verification tools
are limited to only a handful of properties that can
be expressed as first-order constraints over the inputs
and output of a network. While adversarial robustness
and fairness fall under this category, many
real-world properties (e.g., “an autonomous vehicle
has to stop in front of a stop sign”) remain outside
the scope of existing verification technology. To mitigate
this severe practical restriction, we introduce
a novel framework for verifying neural networks,
named neuro-symbolic verification. The key idea is
to use neural networks as part of the otherwise logical
specification, enabling the verification of a wide
variety of complex, real-world properties, including
the one above. Moreover, we demonstrate how
neuro-symbolic verification can be implemented on
top of existing verification infrastructure for neural
networks, making our framework easily accessible
to researchers and practitioners alike.},
      Pages = {},
      Keywords = {Verification, Neurosymbolic, Deep Networks, Relative Verification},
      Url={./papers/xie2022ijcai_verify.pdf}
}

@inproceedings{moosavi2022adapters,
      Anote = {./images/moosavi2022adapters.png},
      booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics (NAACL 2022) },
      title={Adaptable Adapters},
      author={Nafise Sadat Moosavi and Quentin Delfosse and Kristian Kersting and Iryna Gurevych},
      Note = {State-of-the-art pretrained NLP models contain
a hundred million to trillion parameters.
Adapters provide a parameter-efficient alternative
for the full finetuning in which we can only
finetune lightweight neural network layers on
top of pretrained weights. Adapter layers are
initialized randomly. However, existing work
uses the same adapter architecture—i.e., the
same adapter layer on top of each layer of the
pretrained model—for every dataset, regardless
of the properties of the dataset or the amount
of available training data. In this work, we
introduce adaptable adapters that contain (1)
learning different activation functions for different
layers and different input data, and (2) a
learnable switch to select and only use the beneficial
adapter layers. We show that adaptable
adapters achieve on-par performances with the
standard adapter architecture while using a considerably
smaller number of adapter layers. In
addition, we show that the selected adapter architecture
by adaptable adapters transfers well
across different data settings and similar tasks.
We propose to use adaptable adapters for designing
efficient and effective adapter architectures.
The resulting adapters (a) contain about
50% of the learning parameters of the standard
adapter and are therefore more efficient
at training and inference, and require less storage
space, and (b) achieve considerably higher
performances in low-data settings},
      year={2022},
      Pages = {},
      Keywords = {Adapter Layers, Rational Activation, Learnable Activation},
      Url={./papers/moosavi2022adapters.pdf}
}

@inproceedings{pfanschilling2022kr_spll,
      Anote = {./images/pfanschilling2022kr_spll.png},
      booktitle = {Proceedings of the 19th International Conference on Principles of Knowledge Representation and Reasoning (KR)},
      title={Sum-Product Loop Programming: From Probabilistic Circuits to Loop Programming},
      author={Viktor Pfanschilling and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
      Note = {Recently, Probabilistic Circuits such as Sum-Product Networks
have received growing attention, as they can represent
complex features but still provide tractable inference. Although
quite successful, unfortunately, they lack the capability
of handling control structures, such as for and while
loops. In this work, we introduce Sum-Product Loop Language
(SPLL), a novel programming language that is capable
of tractable inference on complex probabilistic code that includes
loops. SPLL has dual semantics: every program has
generative semantics familiar to most programmers and probabilistic
semantics that assign a probability to each possible
result. This way, the programmer can describe how to generate
samples almost like in any standard programming language.
The language takes care of computing the probability
values of all results for free at run time. We demonstrate
that SPLL inherits the beneficial properties of PCs, namely
tractability and differentiability while generalizing to other
distributions and programs.},
      year={2022},
      Pages = {},
      Keywords = {probabilistic programming, probabilistic circuits, loop language, differentiable programming},
      Url={./papers/pfanschilling2022kr_spll.pdf}
}

@inproceedings{skryagin2022kr_slash,
      Anote = {./images/skryagin2022kr_slash.png},
      booktitle = {Proceedings of the 19th International Conference on Principles of Knowledge Representation and Reasoning (KR)},
      title={Neural-Probabilistic Answer Set Programming},
      author={Arseny Skryagin and Wolfgang Stammer and Daniel Ochs and Devendra Singh Dhami and Kristian Kersting},
      Note = {The goal of combining the robustness of neural networks
and the expressivity of symbolic methods has rekindled the
interest in Neuro-Symbolic AI. One specifically interesting
branch of research is deep probabilistic programming languages
(DPPLs) which carry out probabilistic logical programming
via the probability estimations of deep neural networks.
However, recent SOTA DPPL approaches allow only
for limited conditional probabilistic queries and do not offer
the power of true joint probability estimation. In our work,
we propose an easy integration of tractable probabilistic inference
within a DPPL. To this end we introduce SLASH,
a novel DPPL that consists of Neural-Probabilistic Predicates
(NPPs) and a logical program, united via answer set programming.
NPPs are a novel design principle allowing for the unification
of all deep model types and combinations thereof to
be represented as a single probabilistic predicate. In this context,
we introduce a novel +/− notation for answering various
types of probabilistic queries by adjusting the atom notations
of a predicate. We evaluate SLASH on the benchmark
task of MNIST addition as well as novel tasks for DPPLs
such as missing data prediction, generative learning and set
prediction with state-of-the-art performance, thereby showing
the effectiveness and generality of our method.},
      year={2022},
      Pages = {},
      Keywords = {probabilistic programming, deep programming, neural networks, ASP, probabilistic circuits},
      Url={./papers/skryagin2022kr_slash.pdf}
}


@inproceedings{struppek2022facct_hash,
      Anote = {./images/struppek2021learning.png},
      booktitle = {Proceedings of the ACM Conference on Fairness, Accountability, and Transparency (FAccT)},
      title={Learning to Break Deep Perceptual Hashing: The Use Case NeuralHash},
      author={Lukas Struppek and Dominik Hintersdorf and Daniel Neider and Kristian Kersting},
      Note = {Apple recently revealed its deep perceptual hashing system NeuralHash to detect child sexual abuse material (CSAM) on user devices before files are uploaded to its iCloud service. Public criticism quickly arose regarding the protection of user privacy and the system's reliability. In this paper, we present the first comprehensive empirical analysis of deep perceptual hashing based on NeuralHash. Specifically, we show that current deep perceptual hashing may not be robust. An adversary can manipulate the hash values by applying slight changes in images, either induced by gradient-based approaches or simply by performing standard image transformations, forcing or preventing hash collisions. Such attacks permit malicious actors easily to exploit the detection system: from hiding abusive material to framing innocent users, everything is possible. Moreover, using the hash values, inferences can still be made about the data stored on user devices. In our view, based on our results, deep perceptual hashing in its current form is generally not ready for robust client-side scanning and should not be used from a privacy perspective. },
      year={2022},
      Pages = {},
      Keywords = {secure AI, client-side scanning, perceptual hashing},
      Url={../../papers/struppek2022facct_ hash.pdf},
      OPTHowpublished = {arXiv preprint arXiv:2111.06628}
}


@incollection{hintersdorf2022conpro_learning,
      Anote = {./images/hintersdorf2022conpro_learning.png},
      booktitle = {Working Notes of the 6th Workshop on Technology and Consumer Protection (ConPro)},
      title={Investigating the Risks of Client-Side Scanning for the Use Case NeuralHash},
      author={Dominik Hintersdorf and Lukas Struppek and Daniel Neider and Kristian Kersting},
      Note = {Regulators around the world try to stop the distribution
of digital criminal content without circumventing the
encryption methods in place for confidential communication.
One approach is client-side scanning (CSS) which checks the
content on the user’s device for illegality before it is encrypted
and transmitted. Apple has recently revealed a client-side deep
perceptual hashing system called NeuralHash to detect child
sexual abuse material (CSAM) on user devices before the images
are encrypted and uploaded to its iCloud service. After its public
presentation, criticism arose regarding the users’ privacy and the
system’s reliability. In this work, we present an empirical analysis
of client-side deep perceptual hashing based on NeuralHash.
We show that such systems are not robust, and an adversary
can easily manipulate the hash values to force or prevent
hash collisions. Such attacks permit malicious actors to exploit
the detection system: from hiding abusive material to framing
innocent users, a large variety of attacks is possible. },
      year={2022},
      Pages = {},
      Key = {Best Paper Award at ConPro 2022},
      Keywords = {secure AI, client-side scanning, perceptual hashing},
      Url={../../papers/hintersdorf2022conpro_learning.pdf}
}


@inproceedings{schramowski2022facct_q16,
        title={Can Machines Help Us Answering Question 16 in Datasheets, and In Turn Reflecting on Inappropriate Content? },
        author={Patrick Schramowski and Christopher Tauchmann and Kristian Kersting},
        booktitle = {Proceedings of the ACM Conference on Fairness, Accountability, and Transparency  (FAccT)},
        year={2022},
        Url={./papers/schramowski2022facct_q16.pdf},
        Note={Large datasets underlying much of current machine learning raise serious issues concerning inappropriate content such as offensive, insulting, threatening, or might otherwise cause anxiety. This calls for increased dataset documentation, e.g., using datasheets. They, among other topics, encourage to reflect on the composition of the datasets. So far, this documentation, however, is done manually and therefore can be tedious and error-prone, especially for large image datasets. Here we ask the arguably "circular" question of whether a machine can help us reflect on inappropriate content, answering Question 16 in Datasheets. To this end, we propose to use the information stored in pre-trained transformer models to assist us in the documentation process. Specifically, prompt-tuning based on a dataset of socio-moral values steers CLIP to identify potentially inappropriate content, therefore reducing human labor. We then document the inappropriate images found using word clouds, based on captions generated using a vision-language model. The documentations of two popular, large-scale computer vision datasets -- ImageNet and OpenImages -- produced this way suggest that machines can indeed help dataset creators to answer Question 16 on inappropriate image content.},
        Anote={./images/offensiveimages.jpg},
        Keywords={Dataset Curation, Dataset Documentation, Computer Vision, Pre-trained models, Prompt-tuning, CLIP}
  }

@inproceedings{karanam2022pgm_exspns,
Anote = {./images/karanam2022pgm_exspns.png},
Author = {Bhagirath Athresh Karanam and Saurabh Mathur and Predrag Radivojac and David M. Haas and Kristian Kersting and Sriraam Natarajan},
Booktitle = {Proceedings of the 11th International Conference on Probabilistic Graphical Models (PGM)},
Note = {We consider the problem of explaining a class of tractable deep probabilistic models, the
Sum-Product Networks (SPNs) and present an algorithm EXSPN to generate explanations.
To this e ect, we de ne the notion of a context-specific independence tree (CSI-tree)
and present an iterative algorithm that converts an SPN to a CSI-tree. The resulting
CSI-tree is both interpretable and explainable to the domain expert. We achieve this by
extracting the conditional independencies encoded by the SPN and approximating the local
context speci ed by the structure of the SPN. Our extensive empirical evaluations on synthetic,
standard, and real-world clinical data sets demonstrate that the CSI-tree exhibits
superior explainability.},
  Keywords = {Probabilistic Circuits, Sum Product Networks, Explainable AI, XAI, CSI-Tree, Deep Models},
Pages = {},
Url = {./papers/karanam2022pgm_exspns.pdf},
Title = {Explaining Deep Tractable Probabilistic Models: The sum-product network case},
Year = {2022}}


@inproceedings{ameli2022aiai,
  Anote = { ./images/ameli2022aiai.png},
  title = {Unsupervised Multi-sensor Anomaly Localization with Explainable AI},
  author = {Mina Ameli and Viktor Pfanschilling and Anar Amirli and Wolfgang Maaß and Kristian Kersting},
  booktitle = {Proceedings of the 18th International Conference on Artificial Intelligence Applications and Innovations (AIAI)},
  Keywords = {Anomalie localization, XAI, Multi-Sensor Data, Multivariate time-series},
  year = {2022},
  pages = {507--519},
  pdf = 	 {./papers/lang2022diff-sampling-spns.pdf},
  Note = 	 {Multivariate and Multi-sensor data acquisition for the purpose of device monitoring had a significant impact on recent research in Anomaly Detection. Despite the wide range of anomaly detection approaches, localization of detected anomalies in multivariate and Multi-sensor time-series data remains a challenge. Interpretation and anomaly attribution is critical and could improve the analysis and decision-making for many applications. With anomaly attribution, explanations can be leveraged to understand, on a per-anomaly basis, which sensors cause the root of anomaly and which features are the most important in causing an anomaly. To this end, we propose using saliency-based Explainable-AI approaches to localize the essential sensors responsible for anomalies in an unsupervised manner. While most Explainable AI methods are considered as interpreters of AI models, we show for the first time that Saliency Explainable AI can be utilized in Multi-sensor Anomaly localization applications. Our approach is demonstrated for localizing the detected anomalies in an unsupervised multi-sensor setup, and the experiments show promising results. We evaluate and compare different classes of saliency explainable AI approach on the Server Machine Data (SMD) Dataset and compared the results with the state-of-the-art OmniAnomaly Localization approach. The results of our empirical analysis demonstrate a promising performance.}
}


@misc{mwillig2022cfm,
  Anote = {./images/mwillig2022cfm.png},
  title={Can Foundation Models Talk Causality?},
  author={Moritz Willig and Matej Zečević and Kristian Kersting and Devendra Singh Dhami},
  Keywords = {Causality, Foundation Models, AGI Discussion},
  Note = {Foundation models are subject to an ongoing heated debate, leaving open the question
  of progress towards AGI and dividing the community into two camps: the ones who see the arguably
  impressive results as evidence to the scaling hypothesis, and the others who are worried about
  the lack of interpretability and reasoning capabilities. By investigating to which extent causal
  representations might be captured by these large scale language models, we make a humble efforts
  towards resolving the ongoing philosophical conflicts.},
  year={2022},
  Pages = {},
  Url = {https://arxiv.org/pdf/2206.10591.pdf},
  Howpublished = {arXiv preprint arXiv:2206.10591}
}


@misc{dsteinmann2022xlp,
  Anote = {./images/dsteinmann2022xlp.png},
  title={Machines Explaining Linear Programs},
  author={David Steinmann and Matej Zečević and Kristian Kersting and Devendra Singh Dhami},
  Keywords = {Attribution, XAI, Linear Programs},
  Note = {There has been a recent push in making machine learning models more interpretable so that their
  performance can be trusted. Although successful, these methods have mostly focused on the deep learning
  methods while the fundamental optimization methods in machine learning such as linear programs (LP) have
  been left out. Even if LPs can be considered as whitebox or clearbox models, they are not easy to
  understand in terms of relationships between inputs and outputs. As a linear program only provides the
  optimal solution to an optimization problem, further explanations are often helpful. In this work, we
  extend the attribution methods for explaining neural networks to linear programs. These methods explain
  the model by providing relevance scores for the model inputs, to show the influence of each input on the
  output. Alongside using classical gradient-based attribution methods we also propose a way to adapt
  perturbation-based attribution methods to LPs. Our evaluations of several different linear and integer
  problems showed that attribution methods can generate useful explanations for linear programs. However,
  we also demonstrate that using a neural attribution method directly might come with some drawbacks, as
  the properties of these methods on neural networks do not necessarily transfer to linear programs. The
  methods can also struggle if a linear program has more than one optimal solution, as a solver just
  returns one possible solution. Our results can hopefully be used as a good starting point for further research in this direction.},
  year={2022},
  Pages = {},
  Url = {https://arxiv.org/pdf/2206.07194.pdf},
  Howpublished = {arXiv preprint arXiv:2206.07194}
}


@misc{fbusch2022abnn,
  Anote = {./images/fbusch2022abnn.png},
  title={Attributions Beyond Neural Networks: The Linear Program Case},
  author={Florian Peter Busch and Matej Zečević and Kristian Kersting and Devendra Singh Dhami},
  Keywords = {Attribution, XAI, Linear Programs, Neural Encoding},
  Note = {Linear Programs (LPs) have been one of the building blocks in machine learning and have championed recent strides in differentiable optimizers for learning systems. While there exist solvers for even high-dimensional LPs, understanding said high-dimensional solutions poses an orthogonal and unresolved problem. We introduce an approach where we consider neural encodings for LPs that justify the application of attribution methods from explainable artificial intelligence (XAI) designed for neural learning systems. The several encoding functions we propose take into account aspects such as feasibility of the decision space, the cost attached to each input, or the distance to special points of interest. We investigate the mathematical consequences of several XAI methods on said neural LP encodings. We empirically show that the attribution methods Saliency and LIME reveal indistinguishable results up to perturbation levels, and we propose the property of Directedness as the main discriminative criterion between Saliency and LIME on one hand, and a perturbation-based Feature Permutation approach on the other hand. Directedness indicates whether an attribution method gives feature attributions with respect to an increase of that feature. We further notice the baseline selection problem beyond the classical computer vision setting for Integrated Gradients.},
  year={2022},
  Pages = {},
  Url = {https://arxiv.org/pdf/2206.07203.pdf},
  Howpublished = {arXiv preprint arXiv:2206.07203}
}


@misc{jseng2022tant,
  Anote = {./images/jseng2022tant.png},
  title={Tearing Apart NOTEARS: Controlling the Graph Prediction via Variance Manipulation},
  author={Jonas Seng and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {Structure Discovery, Causality, Variance Manipulation},
  Note = {Simulations are ubiquitous in machine learning. Especially in graph learning, simulations of Directed Acyclic Graphs (DAG) are being deployed for evaluating new algorithms. In the literature, it was recently argued that continuous-optimization approaches to structure discovery such as NOTEARS might be exploiting the sortability of the variable's variances in the available data due to their use of least square losses. Specifically, since structure discovery is a key problem in science and beyond, we want to be invariant to the scale being used for measuring our data (e.g. meter versus centimeter should not affect the causal direction inferred by the algorithm). In this work, we further strengthen this initial, negative empirical suggestion by both proving key results in the multivariate case and corroborating with further empirical evidence. In particular, we show that we can control the resulting graph with our targeted variance attacks, even in the case where we can only partially manipulate the variances of the data.},
  year={2022},
  Pages = {},
  Url = {https://arxiv.org/pdf/2206.07195.pdf},
  Howpublished = {arXiv preprint arXiv:2206.07195}
}


@misc{youssef2022cbp,
  Anote = {./images/youssef2022cbp.png},
  title={Towards a Solution to Bongard Problems: A Causal Approach},
  author={Salahedine Youssef and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {Causality, Bongard Problems, Reinforcement Learning, AGI},
  Note = {To date, Bongard Problems (BP) remain one of the few fortresses of AI history yet to be raided by the powerful models of the current era. We present a systematic analysis using modern techniques from the intersection of causality and AI/ML in a humble effort of reviving research around BPs. Specifically, we first compile the BPs into a Markov decision process, then secondly pose causal assumptions on the data generating process arguing for their applicability to BPs, and finally apply reinforcement learning techniques for solving the BPs subject to the causal assumptions.},
  year={2022},
  Pages = {},
  Url = {https://arxiv.org/pdf/2206.07196.pdf},
  Howpublished = {arXiv preprint arXiv:2206.07196}
}

@inproceedings{lang2022diff-sampling-spns,
  Anote = { ./images/lang2022diff-sampling-spns.png },
  title = {Elevating Perceptual Sample Quality in Probabilistic Circuits through Differentiable Sampling},
  author = {Steven Lang and Martin Mundt and Fabrizio Ventola and Robert Peharz and Kristian Kersting},
  booktitle = {NeurIPS 2021 Workshop on Pre-registration in Machine Learning},
  Keywords = {Probabilistic Circuits, Sum-Product Networks, Differentiable Sampling, Deep Learning},
  year = {2022},
  volume = 	 {181},
  series = 	 {Proceedings of Machine Learning Research},
  pages = {1--25},
  Opteditor = {Bertinetto, Luca and Henriques, João F. and Albanie, Samuel and Paganini, Michela and Varol, Gül},
  Optmonth = {13 Dec},
  Opturl = {},
  publisher =  {PMLR},
  Url = 	 {https://proceedings.mlr.press/v181/lang22a/lang22a.pdf},
  Note = 	 {Deep generative models have seen a dramatic improvement in recent years, due to the use of alternative losses based on perceptual assessment of generated samples. This improvement has not yet been applied to the model class of probabilistic circuits (PCs), presumably due to significant technical challenges concerning differentiable sampling, which is a key requirement for optimizing perceptual losses. This is unfortunate, since PCs allow a much wider range of probabilistic inference routines than main-stream generative models, such as exact and efficient marginalization and conditioning. Motivated by the success of loss reframing in deep generative models, we incorporate perceptual metrics into the PC learning objective. To this aim, we introduce a differentiable sampling procedure for PCs, where the central challenge is the non-differentiability of sampling from the categorical distribution over latent PC variables. We take advantage of the Gumbel-Softmax trick and develop a novel inference pass to smoothly interpolate child samples as a strategy to circumvent non-differentiability of sum node sampling. We initially hypothesized, that  perceptual losses, unlocked by our novel differentiable sampling procedure, will elevate the generative power of PCs and improve their sample quality to be on par with neural counterparts like probabilistic auto-encoders and generative adversarial networks. Although our experimental findings empirically reject this hypothesis for now, the results demonstrate that samples drawn from PCs optimized with perceptual losses can have similar sample quality compared to likelihood-based optimized PCs and, at the same time, can express richer contrast, colors, and details. Whereas before, PCs were restricted to likelihood-based optimization, this work has paved the way to advance PCs with loss formulations that have been built around deep neural networks in recent years.}
}


@unpublished{kersting2022welt_AIP,
    Anote = {./images/WeltAmSonntag.png},
    title={Artificial Intellectual Properties},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, June 19, 2022, page 26},
    Publisher = {},
    Note = {AI is changing the way science is done, artwork is created, and inventions are made. What does this mean for patents and intellectural property rights?},
    Keywords = {AI, IPR, Patents, GAN, Edmond de Belamy, Imagen, DALL-E2},
    Url={}
}


@inproceedings{friedrich2022hhai,
    Anote = {./images/friedrich2022hhai.png},
    title={Interactively Providing Explanations for Transformer Language Models},
    author={Felix Friedrich and Patrick Schramowski and Christopher Tauchmann and Kristian Kersting},
    Note = {Transformer language models (LMs) are state of the art in a multitude of NLP tasks. Despite these successes, their opaqueness remains problematic, especially as the training data might be unfiltered and contain biases. As a result, ethical concerns about these models arise, which can have a substantial negative impact on society as they get increasingly integrated into our lives. Therefore, it is not surprising that a growing body of work aims to provide interpretability and explainability to black-box LMs: Recent evaluations of saliency or attribution methods find that, while intriguing, different methods assign importance to different inputs for the same outputs, thus encouraging misinterpretation and reporting bias. Moreover, these methods primarily focus on post-hoc explanations of (sometimes spurious) input-output correlations. Instead, we emphasize using (interactive) prototype networks directly incorporated into the model architecture and hence explain the reasoning behind the network’s decisions.},
    year={2022},
    Pages = {},
    Keywords = {Transformer, Large Language Models, Prototype Layers, Explainable AI, Explanatory Interactive Learning},
    booktitle= {Proceedings of the 1st Conference of Hybrid Human Artificial Intelligence (HHAI) and in Frontiers in Artificial Intelligence and Applications},
    Url={./papers/friedrich2022hhai.pdf}
}


@incollection{zecevic2022osc,
      Anote = {./images/zecevic2022osc.png},
      title={Finding Structure and Causality in Linear Programs},
      author={Matej Zečević and Florian Peter Busch and Devendra Singh Dhami and Kristian Kersting},
      Note = {Linear Programs (LP) are celebrated widely, particularly so in machine learning where they
      have allowed for effectively solving probabilistic inference tasks or imposing structure on end-to-end
      learning systems. Their potential might seem depleted but we propose a foundational, causal perspective
      that reveals intriguing intra- and inter-structure relations for LP components. We conduct a systematic,
      empirical investigation on general-, shortest path- and energy system LPs.},
      year={2022},
      Pages = {},
      Keywords = {Causality, Linear Programs, Graph Learning, Structure Discover},
      Url = {https://arxiv.org/abs/2203.15274},
      Booktitle = {ICLR 2022 Workshop on the Elements of Reasoning: Objects, Structure and Causality (OSC)}
}


@misc{friedrich2022xiltypology,
      title={A Typology to Explore and Guide Explanatory Interactive Machine Learning},
      author={Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
      year={2022},
      howpublished={arXiv preprint arXiv:2203.03668},
      Url={https://arxiv.org/pdf/2203.03668.pdf},
      Keywords = {Explanatory Interactive Machine Learning (XIL), Research Transparency and Comparability, Explainable Artificial Intelligence (XAI)},
      Anote = {./images/friedrich2022xiltypology.png},
      Note= {Recently, more and more eXplanatory Interactive machine Learning (XIL) methods have been proposed with the goal of extending a model's learning process by integrating human user supervision on the model's explanations. These methods were often developed independently, provide different motivations and stem from different applications. Notably, up to now, there has not been a comprehensive evaluation of these works. By identifying a common set of basic modules and providing a thorough discussion of these modules, our work, for the first time, comes up with a unification of the various methods into a single typology. This typology can thus be used to categorize existing and future XIL methods based on the identified modules. Moreover, our work contributes by surveying six existing XIL methods. In addition to benchmarking these methods on their overall ability to revise a model, we perform additional benchmarks regarding wrong reason revision, interaction efficiency, robustness to feedback quality, and the ability to revise a strongly corrupted model. Apart from introducing these novel benchmarking tasks, for improved quantitative evaluations, we further introduce a novel Wrong Reason (WR) metric which measures the average wrong reason activation in a model's explanations to complement a qualitative inspection. In our evaluations, all methods prove to revise a model successfully. However, we found significant differences between the methods on individual benchmark tasks, revealing valuable application-relevant aspects not only for comparing current methods but also to motivate the necessity of incorporating these benchmarks in the development of future XIL methods.}
}


@misc{friedrich2022RiT,
      title={Revision Transformers: Getting RiT of No-Nos},
      author={Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
      year={2022},
      howpublished={arXiv preprint arXiv:2210.10332},
      Url={https://arxiv.org/pdf/2210.10332.pdf},
      Keywords = {Moral, Machine Ethics, Transformer Architecture, Fair and Trustworthy AI, Interactive Learning, Human-centered AI},
      Anote = {./images/friedrich2022RiT.png},
      note = {Current transformer language models (LM) are large-scale models with billions of parameters. They have been shown to provide high performances on a variety of tasks but are also prone to shortcut learning and bias. Addressing such incorrect model behavior via parameter adjustments is very costly. This is particularly problematic for updating dynamic concepts, such as moral values, which vary culturally or interpersonally. In this work, we question the current common practice of storing all information in the model parameters and propose the Revision Transformer (RiT) employing information retrieval to facilitate easy model updating. The specific combination of a large-scale pre-trained LM that inherently but also diffusely encodes world knowledge with a clear-structured revision engine makes it possible to update the model's knowledge with little effort and the help of user interaction. We exemplify RiT on a moral dataset and simulate user feedback demonstrating strong performance in model revision even with small data. This way, users can easily design a model regarding their preferences, paving the way for more transparent and personalized AI models.}
}

@inproceedings{fang2022dpctgan,
    booktitle = {Proceedings of the International Conference on Artificial Intelligence in Medicine (AIME)},
      title={DP-CTGAN: Differentially Private Medical Data Generation using CTGANs},
      author={Mei Ling Fang and Devendra Singh Dhami and Kristian Kersting},
      year={2022},
      Keywords={Differntial Privacy, Federated Learning, Tabular Data, GAN, Medicial Data},
      Anote={./images/fang2022dpctgan.png},
      Note={Generative Adversarial Networks (GANs) are an important
tool to generate synthetic medical data, in order to combat the limited
and difficult access to the real data sets and accelerate the innovation
in the healthcare domain. Despite their promising capability, they are
vulnerable to various privacy attacks that might reveal information of
individuals from the training data. Preserving privacy while keeping the
quality of the generated data still remains a challenging problem. We
propose DP-CTGAN, which incorporates differential privacy into a conditional
tabular generative model. Our experiments demonstrate that our
model outperforms existing state-of-the-art models under the same privacy
budget on several benchmark data sets. In addition, we combine our
method with federated learning, enabling a more secure way of synthetic
data generation without the need of uploading locally collected data to
a central repository.},
      Crossref={},
      Url={./papers/fang2022dpctgan.pdf}
}


@article{schramowski2022nmi_moral,
  Anote = {./images/schramowski2022nmi_moral.png},
  title = {Large pre-trained language models contain human-like biases of what is right and wrong to do},
  Author = {Patrick Schramowski and Cigdem Turan and Nico Andersen and Constantin A. Rothkopf and Kristian Kersting},
  Journal = {Nature Machine Intelligence},
  Note = {Artificial writing is permeating our lives due to recent advances in large-scale, transformer-based
  language models (LMs) such as BERT, GPT-2 and GPT-3, and others. Using them as pre-trained models and fine-tuning them
  for specific tasks, researchers have extended the state of the art for many natural language processing (NLP) tasks and
  shown that they capture not only linguistic knowledge but also retain general knowledge implicitly present in the data.
  Unfortunately, LMs trained on unfiltered text corpora suffer from degenerated and biased behaviour. While this is well
  established, we show here that recent LMs also contain human-like biases of what is right and wrong to do, reflecting
  existing ethical and moral norms of society. We show that these norms can be captured geometrically by a ‘moral direction’
  which can be computed, e.g., by a PCA, in the embedding space. The computed ‘moral direction’ can rate the normativity
  (or non-normativity) of arbitrary phrases without explicitlytraining the LM for this task, reflecting social norms well.
  We demonstrate that computing the ’moral direction’can provide a path for attenuating or even preventing toxic degeneration
  in LMs, showcasing this capability on the RealToxicityPrompts testbed.},
    Keywords = {Deep Learning, Transformer, Machine Ethics, Moral, Values, Human Bias, Stereotypes, Moral Choices},
  Publisher = {Nature Publishing Group},
  year={2022},
month={Mar},
day={01},
volume={4},
number={3},
pages={258-268},
issn={2522-5839},
doi={10.1038/s42256-022-00458-8},
url={https://doi.org/10.1038/s42256-022-00458-8}
}


  @article{kordjamshidi22frontiers,
    Anote = {./images/kordjamshidi22frontiers.png},
    Keywords = {Learning-Based Programming, Statistical Relatioanl AI, Neuro-symbolic AI, Abstraction, Reasoning, Learning, Systems AI},
    author    = {Parisa Kordjamshidi and Dan Roth and Kristian Kersting},
    title     = {Declarative Learning-Based Programming as an Interface to AI Systems},
    journal   = {Frontiers in Artificial Intelligence},
    volume    = {5},
    Note      = {Data-driven approaches are becoming increasingly common as problem-solving tools in many areas of science and technology. In most cases, machine learning models are the key component of these solutions. Often, a solution involves multiple learning models, along with significant levels of reasoning with the models' output and input. However, the current tools are cumbersome not only for domain experts who are not fluent in machine learning but also for machine learning experts who evaluate new algorithms and models on real-world data and develop AI systems. We review key efforts made by various AI communities in providing languages for high-level abstractions over learning and reasoning techniques needed for designing complex AI systems. We classify the existing frameworks based on the type of techniques and their data and knowledge representations, compare the ways the current tools address the challenges of programming real-world applications and highlight some shortcomings and future directions. Our comparison is only qualitative and not experimental since the performance of the systems is not a factor in our study.},
    pages     = {},
    year      = {2022},
    Url       = {./papers/kordjamshidi22frontiers.pdf},
    doi       = {10.3389/frai.2022.755361}
  }

@article{petkovic2022kbs,
  Anote = {./images/petkovic2022kbs.png},
	Note =  {As the complexity of data increases, so does the importance of powerful representations, such as relational and logical representations, as well as the need for machine learning methods that can learn predictive models in such representations. A characteristic of these representations is that they give rise to a huge number of features to be considered, thus drastically increasing the difficulty of learning in terms of computational complexity and the curse of dimensionality. Despite this, methods for ranking features in this context, i.e., estimating their importance are practically non-existent. Among the most well-known methods for feature ranking are those based on ensembles, and in particular tree ensembles. To develop methods for feature ranking in a relational context, we adopt the relational tree ensemble approach. We thus first develop methods for learning ensembles of relational trees, extending a wide spectrum of tree-based ensemble methods from the propositional to the relational context, resulting in methods for bagging and random forests of relational trees, as well as gradient boosted ensembles thereof. Complex relational features are considered in our ensembles: by using complex aggregates, we extend the standard collection of features that correspond to existential queries, such as `Does this person have any children?', to more complex features that correspond to aggregation queries, such as `What is the average age of this person's children?'. We also calculate feature importance scores and rankings from the different kinds of relational tree ensembles learned, with different kinds of relational features. The rankings provide insight into and explain the ensemble models, which would be otherwise difficult to understand. We compare the methods for learning single trees and different tree ensembles, using only existential qualifiers and using the whole set of relational features, against 10 state-of-the-art methods on a collection of benchmark relational datasets, deriving also the corresponding feature rankings. Overall, the bagging ensembles perform the best, with gradient boosted ensembles following closely. The use of aggregates is beneficial and in some datasets drastically improves performance: In these cases, aggregate-based features clearly stand out in the feature rankings derived from the ensembles.},
	Author = {Matej Petković and Michelangelo Ceci and Gianvito Pio and Blaž Škrlj and Kristian Kersting and Sašo Džeroski},
	Journal = {Knowledge-Based Systems},
	Keywords = {Relational learning, Tree ensembles, Feature ranking, Propositionalization},
	Pages = {109254},
	Title = {Relational tree ensembles and feature rankings},
	Url = {./papers/petkovic2022kbs.pdf},
	Volume = {251},
	Year = {2022}
}


  @misc{schramowski2022q16,
        title={Can Machines Help Us Answering Question 16 in Datasheets, and In Turn Reflecting on Inappropriate Content? },
        author={Patrick Schramowski and Christopher Tauchmann and Kristian Kersting},
        year={2022},
        Url={https://arxiv.org/pdf/2202.06675.pdf},
        Note={Large datasets underlying much of current machine learning raise serious issues concerning inappropriate content such as offensive, insulting, threatening, or might otherwise cause anxiety. This calls for increased dataset documentation, e.g., using datasheets. They, among other topics, encourage to reflect on the composition of the datasets. So far, this documentation, however, is done manually and therefore can be tedious and error-prone, especially for large image datasets. Here we ask the arguably "circular" question of whether a machine can help us reflect on inappropriate content, answering Question 16 in Datasheets. To this end, we propose to use the information stored in pre-trained transformer models to assist us in the documentation process. Specifically, prompt-tuning based on a dataset of socio-moral values steers CLIP to identify potentially inappropriate content, therefore reducing human labor. We then document the inappropriate images found using word clouds, based on captions generated using a vision-language model. The documentations of two popular, large-scale computer vision datasets -- ImageNet and OpenImages -- produced this way suggest that machines can indeed help dataset creators to answer Question 16 on inappropriate image content.},
        Anote={./images/offensiveimages.jpg},
        Howpublished = {arXiv preprint arXiv:2202.06675},
        Keywords={Dataset Curation, Dataset Documentation, Computer Vision, Pre-trained models, Prompt-tuning, CLIP}
  }


  @misc{shao2022rightLatent,
        title={Right for the Right Latent Factors: Debiasing Generative Models via Disentanglement},
        author={Xiaoting Shao and Karl Stelzner and Kristian Kersting},
        year={2022},
        Url={https://arxiv.org/pdf/2202.00391.pdf},
        Note={A key assumption of most statistical machine learning methods is that they have access to independent samples from the distribution of data they encounter at test time. As such, these methods often perform poorly in the face of biased data, which breaks this assumption. In particular, machine learning models have been shown to exhibit Clever-Hans-like behaviour, meaning that spurious correlations in the training set are inadvertently learnt. A number of works have been proposed to revise deep classifiers to learn the right correlations. However, generative models have been overlooked so far. We observe that generative models are also prone to Clever-Hans-like behaviour. To counteract this issue, we propose to debias generative models by disentangling their internal representations, which is achieved via human feedback. Our experiments show that this is effective at removing bias even when human feedback covers only a small fraction of the desired distribution. In addition, we achieve strong disentanglement results in a quantitative comparison with recent methods.},
        Anote={./images/latentFactor.png},
        Howpublished = {arXiv preprint arXiv:2202.00391},
        Keywords={Clever-Hans Moments, Generative Deep Models, Interactive Learning, Debiasing}
  }


@article{rawal22frontiers,
  Anote = {./images/rawal22frontiers.png},
  Keywords = {Facial Expression, Social Robot, Deep Generative Model},
  author    = {Niyati Rawal and
               Dorothea Koert and
               Cigdem Turan and
               Kristian Kersting and
               Jan Peters and
               Ruth Stock{-}Homburg},
  title     = {ExGenNet: Learning to Generate Robotic Facial Expression Using Facial Expression Recognition},
  journal   = {Frontiers in Robotics and {AI}},
  volume    = {8},
  Note      = {The ability of a robot to generate appropriate facial expressions is a key aspect of perceived
sociability in human-robot interaction. Yet many existing approaches rely on the use of a
set of fixed, preprogrammed joint configurations for expression generation. Automating
this process provides potential advantages to scale better to different robot types and
various expressions. To this end, we introduce ExGenNet, a novel deep generative
approach for facial expressions on humanoid robots. ExGenNets connect a generator
network to reconstruct simplified facial images from robot joint configurations with a
classifier network for state-of-the-art facial expression recognition. The robots’ joint
configurations are optimized for various expressions by backpropagating the loss
between the predicted expression and intended expression through the classification
network and the generator network. To improve the transfer between human training
images and images of different robots, we propose to use extracted features in the
classifier as well as in the generator network. Unlike most studies on facial expression
generation, ExGenNets can produce multiple configurations for each facial expression and
be transferred between robots. Experimental evaluations on two robots with highly humanlike
faces, Alfie (Furhat Robot) and the android robot Elenoide, show that ExGenNet can
successfully generate sets of joint configurations for predefined facial expressions on both
robots. This ability of ExGenNet to generate realistic facial expressions was further
validated in a pilot study where the majority of human subjects could accurately
recognize most of the generated facial expressions on both the robots.},
  pages     = {},
  year      = {2022},
  Url       = {./papers/rawal22frontiers.pdf},
  doi       = {10.3389/frobt.2021.730317}
}


@incollection{lang2021diffsamplingpcs,
      Anote = {./images/lang2021diffsamplingpcs.png},
      title={Elevating Perceptual Sample Quality in Probabilistic Circuits through Differentiable Sampling},
      author={Steven Lang and Martin Mundt and Fabrizio Ventola and Robert Peharz and Kristian Kersting},
      Note = {Deep generative models have seen a dramatic improvement in recent years, due to the use of alternative losses based on perceptual assessment of generated samples. This improvement has not yet been applied to the model class of probabilistic circuits (PCs), presumably due to significant technical challenges concerning differentiable sampling, which is a key requirement for optimizing perceptual losses. This is unfortunate, since PCs allow a much wider range of probabilistic inference routines than main-stream generative models, such as exact and efficient marginalization and conditioning. Motivated by the success of loss reframing in deep generative models, we incorporate perceptual metrics into the PC learning objective. To this aim, we introduce a differentiable sampling procedure for PCs, where the central challenge is the non-differentiability of sampling from the categorical distribution over latent PC variables. We take advantage of the Gumbel-Softmax trick and develop a novel inference pass to smoothly interpolate child samples as a strategy to circumvent non-differentiability of sum node sampling. Our hypothesis is that perceptual losses, unlocked by our novel differentiable sampling procedure, will elevate the generative power of PCs and improve their sample quality to be on par with neural counterparts like probabilistic auto-encoders and generative adversarial networks.},
      year={2021},
      Pages = {},
      Keywords = {Probabilistic Circuits, Sum-Product Networks, Differentiable Sampling, Deep Learning},
      Url = {https://preregister.science/papers_21neurips/10_paper.pdf},
      Booktitle = {The NeurIPS 2021 Pre-registration Workshop}
}

@misc{stammer2021icsni,
	Anote = {./images/stammer2021_icsn.png},
	title={Interactive Disentanglement: Learning Concepts by Interacting with their Prototype Representations},
	author={Wolfgang Stammer and Marius Memmel and Patrick Schramowski and Kristian Kersting},
	Note = {Learning visual concepts from raw images without strong supervision is a challenging task. In this work, we show the advantages of prototype representations for understanding and revising the latent space of neural concept learners. For this purpose, we introduce interactive Concept Swapping Networks (iCSNs), a novel framework for learning concept-grounded representations via weak supervision and implicit prototype representations. iCSNs learn to bind conceptual information to specific prototype slots by swapping the latent representations of paired images. This semantically grounded and discrete latent space facilitates human understanding and human-machine interaction. We support this claim by conducting experiments on our novel data set "Elementary Concept Reasoning" (ECR), focusing on visual concepts shared by geometric objects.},
	year={2021},
	Pages = {},
	Keywords = {Interactive Concept Learning, Prototype Representations},
	Url={http://arxiv.org/abs/2112.02290},
	Howpublished = {arXiv preprint arXiv:2112.02290}
}


@misc{skryagin2021slash,
	Anote = {./images/slash.jpg},
	title={SLASH: Embracing Probabilistic Circuits into Neural Answer Set Programming},
	author={Arseny Skryagin and Wolfgang Stammer and Daniel Ochs and Devendra Singh Dhami and Kristian Kersting},
	Keywords = {deep probabilistic programming, neuro-symbolic AI, probabilisitic circuits, object-centric learning},
	Note = {The goal of combining the robustness of neural networks and the expressivity of symbolic methods has rekindled the interest in neuro-symbolic AI. Recent advancements in neuro-symbolic AI often consider specifically-tailored architectures consisting of disjoint neural and symbolic components, and thus do not exhibit desired gains that can be achieved by integrating them into a unifying framework. We introduce SLASH -- a novel deep probabilistic programming language (DPPL). At its core, SLASH consists of Neural-Probabilistic Predicates (NPPs) and logical programs which are united via answer set programming. The probability estimates resulting from NPPs act as the binding element between the logical program and raw input data, thereby allowing SLASH to answer task-dependent logical queries. This allows SLASH to elegantly integrate the symbolic and neural components in a unified framework. We evaluate SLASH on the benchmark data of MNIST addition as well as novel tasks for DPPLs such as missing data prediction and set prediction with state-of-the-art performance, thereby showing the effectiveness and generality of our method. },
	year={2021},
	Pages = {},
	Url = {https://arxiv.org/pdf/2110.03395.pdf},
	Howpublished = {arXiv preprint arXiv:2110.03395 }
}

@misc{shindo2021nsfr,
  Anote = {./images/shindo2021nsfr.png},
  title={Neuro-Symbolic Forward Reasoning},
  author={Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {neuro-symbolic AI, differentiable logic, object-centric reasoning},
  Note = {Reasoning is an essential part of human intelligence and thus has been a long-standing goal in artificial intelligence research. With the recent success of deep learning, incorporating reasoning with deep learning systems, i.e., neuro-symbolic AI has become a major field of interest. We propose the Neuro-Symbolic Forward Reasoner (NSFR), a new approach for reasoning tasks taking advantage of differentiable forward-chaining using first-order logic. The key idea is to combine differentiable forward-chaining reasoning with object-centric (deep) learning. Differentiable forward-chaining reasoning computes logical entailments smoothly, i.e., it deduces new facts from given facts and rules in a differentiable manner. The object-centric learning approach factorizes raw inputs into representations in terms of objects. Thus, it allows us to provide a consistent framework to perform the forward-chaining inference from raw inputs. NSFR factorizes the raw inputs into the object-centric representations, converts them into probabilistic ground atoms, and finally performs differentiable forward-chaining inference using weighted rules for inference. Our comprehensive experimental evaluations on object-centric reasoning data sets, 2D Kandinsky patterns and 3D CLEVR-Hans, and a variety of tasks show the effectiveness and advantage of our approach.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2110.09383.pdf},
  Howpublished = {arXiv preprint arXiv:2110.09383}
}

@misc{willig2021tcl,
  Anote = {./images/willig2021tcl.png},
  title={The Causal Loss: Driving Correlation to Imply Causation},
  author={Moritz Willig and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {Causality, Regularization, Neural Causal Models},
  Note = {Most algorithms in classical and contemporary machine learning focus on correlation-based dependence between
  features to drive performance. Although success has been observed in many relevant problems, these algorithms fail
  when the underlying causality is inconsistent with the assumed relations. We propose a novel model-agnostic loss
  function called Causal Loss that improves the interventional quality of the prediction using an intervened
  neural-causal regularizer. In support of our theoretical results, our experimental illustration shows how causal loss
  bestows a non-causal associative model (like a standard neural net or decision tree) with interventional
  capabilities.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2110.12066.pdf},
  Howpublished = {arXiv preprint arXiv:2110.12066}
}

@misc{zecevic2021tncm,
  Anote = {./images/zecevic2021tncm.png},
  title={On the Tractability of Neural Causal Inference},
  author={Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {Causality, Neural-Causal Models, Sum-Product Networks, Tractable Causal Model, Inference},
  Note = {Roth (1996) proved that any form of marginal inference with probabilistic graphical models (e.g. Bayesian
  Networks) will at least be NP-hard. Introduced and extensively investigated in the past decade, the neural
  probabilistic circuits known as sum-product network (SPN) offers linear time complexity. On another note, research
  around neural causal models (NCM) recently gained traction, demanding a tighter integration of causality for machine
  learning. To this end, we present a theoretical investigation of if, when, how and under what cost tractability occurs
   for different NCM. We prove that SPN-based causal inference is generally tractable, opposed to standard MLP-based
   NCM. We further introduce a new tractable NCM-class that is efficient in inference and fully expressive in terms of
   Pearl's Causal Hierarchy. Our comparative empirical illustration on simulations and standard benchmarks validates our
    theoretical proofs.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2110.12052.pdf},
  Howpublished = {arXiv preprint arXiv:2110.12052}
}

@article{shao2022ijar,
          Anote = {./images/shao2022ijar.png},
          Author = {Xiaoting Shao and Alejandro Molina and Antonio Vergari and Karl Stelzner and Robert Peharz and Thomas Liebig and Kristian Kersting},
          Journal = {International Journal of Approximate Reasoning},
          Keywords = {Probabilistic Circuits, Differentiable Sampling, Sum Product Networks, Conditional Distribution, Gating Functions, Deep Models},
          Note = {While probabilistic graphical models are a central tool for reasoning under uncertainty in AI, they are in general not as expressive as
          deep neural models, and inference is notoriously hard and slow. In contrast, deep probabilistic models such as sum-product networks
          (SPNs) capture joint distributions and ensure tractable inference, but still lack the expressive power of intractable models based
          on deep neural networks. In this paper, we introduce conditional SPNs (CSPNs)---conditional density estimators for multivariate and
          potentially hybrid domains---and develop a structure-learning approach that derives both the structure and parameters of CSPNs from data.
          To harness the expressive power of deep neural networks (DNNs), we also show how to realise CSPNs by conditioning the parameters of
          vanilla SPNs on the input using DNNs as gate functions. In experiments, we demonstrate that CSPNs are competitive with other probabilistic
          models and yield superior performance on structured prediction, conditional density estimation, auto-regressive image modeling, and multilabel image classification.
          In particular, we show that employing CSPNs as encoders and decoders within  variational autoencoders can help to relax the commonly used mean field assumption and in turn improve performance.},
          Pages = {298--313},
          Publisher = {Elsevier},
          Title = {Conditional Sum-Product Networks: Modular Probabilistic Circuits via Gate Functions},
          Url = {./papers/shao2022ijar.pdf},
          Volume = {140},
          number = {},
          Year = {2022}
}


@unpublished{kersting2022heise_zeitenwende,
    Anote = {./images/heise.png},
    title={Kommentar: Zeitenwende in der Künstlichen Intelligenz},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Heise Online, June 6, 2022},
    Publisher = {},
    Note = {Große neuronale Sprachmodelle skalieren im Milliardenbereich und werden mächtiger. Game over oder Chance für eine neue KI-Kreislaufwirtschaft in Europa?},
    Keywords = {KI, Recheninfrastruktur, Sprachmodelle, Skalierung, AGI, Allgemeine KI, KI-Kreislaufwirtschaft},
    Url={https://www.heise.de/meinung/Kommentar-von-Kristian-Kersting-Zeitenwende-in-der-Kuenstlichen-Intelligenz-7132372.html}
}


@unpublished{kersting2022welt_notFunny,
    Anote = {./images/WeltAmSonntag.png},
    title={Inadequate Compute Infrastructure is not Funny for AI},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, May 15, 2022, page 24},
    Publisher = {},
    Note = {While AI systems get better and better at explaining jokes, inadequate AI computer infrastructure is not funny.},
    Keywords = {AI, Compute Infrastructure, PaLM, GPT-3, Humor, Scaling, AI Sovereignty},
    Url={}
}


@unpublished{kersting2022welt_putUp,
    Anote = {./images/WeltAmSonntag.png},
    title={Put up or shut up},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, April 17, 2022, page 26},
    Publisher = {},
    Note = {The French Hybrid AI system NooK beats world champions at Bridge},
    Keywords = {AI, Hybrid AI, Deep Learning, Symbolic AI, NooK, Bridge},
    Url={./papers/kersting2022welt_putUp.pdf}
}


@unpublished{kersting2022welt_theory,
    Anote = {./images/WeltAmSonntag.png},
    title={No End of Theory in AI!},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, March 20, 2022, page 24},
    Publisher = {},
    Note = {Artificial Intelligence thrives on assumptions, theories, and data},
    Keywords = {AI, data-driven, theories, assumptions, correlation, causality},
    Url={./papers/kersting2022welt_theory.pdf}
}


@unpublished{kersting2022welt_helloWorld,
    Anote = {./images/WeltAmSonntag.png},
    title={No. "Hello World" just isn't enough},
    author={Kristian Kersting},
    year={2022},
    Howpublished = {Welt am Sonntag, February 20, 2022, page 26},
    Publisher = {},
    Note = {When it comes to AI, Germany can do more than “Hello World” if it really wants to!},
    Keywords = {AI, DeepCoder, Transformer, Automatic Programming, AlphaCode, Codeforces},
    Url={./papers/kersting2022welt_helloWorld.pdf}
}


@unpublished{kersting2022welt_oath,
    Anote = {./images/WeltAmSonntag.png},
    title={An Oath for European Values in AI},
    author={Kristian Kersting and Jonas Andrulis},
    year={2022},
    Howpublished = {Welt am Sonntag, Januar 16, 2022, page 26},
    Publisher = {},
    Note = {Artificial intelligence (AI) has a social responsibility and should be reminded of it, even if it is through a symbolic oath.},
    Keywords = {AI, Human Values, Hippocrathic Oath, Responsibilty},
    Url={./papers/kersting2022welt_oath.pdf}
}

@unpublished{kersting2021welt_plurality,
    Anote = {./images/WeltAmSonntag.png},
    title={The Plurality of AI},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Welt am Sonntag, Dezember 19, 2021, page 26},
    Publisher = {},
    Note = {Artificial intelligence covers a variety of research topics. This diversity is its strength, but we need to talk!},
    Keywords = {AI, Diversity, Deep Fake, Turing Test, Supporting Humans, Mathematical Proofs, Chess, Co-Evolution},
    Url={./papers/kersting2021welt_plurality.pdf}
}


@unpublished{kersting2021welt_values,
    Anote = {./images/WeltAmSonntag.png},
    title={AI Alignment},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Welt am Sonntag, November 21, 2021, page 24},
    Publisher = {},
    Note = {Artificial intelligence needs a foundation of human values, including human dignity and moral agency},
    Keywords = {AI, Normative Values, Human Values, Rationality, Reasoning},
    Url={./papers/kersting2021welt_alignment.pdf}
}


@unpublished{kersting2021welt_butterfly,
    Anote = {./images/WeltAmSonntag.png},
    title={The Butterfly Effect},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Welt am Sonntag, Oktober 17, 2021, page 22},
    Publisher = {},
    Note = {Artificial intelligence can strengthen our protection against extreme weather events and
help us manage the impacts of climate change.},
    Keywords = {AI, Deep Learning, Simulation, Weather Forecast, Extrem Weather, Flooding, Climate Change},
    Url={./papers/kersting2021welt_butterfly.pdf}
}

@misc{schramowski2021inferring,
      title={Inferring Offensiveness In Images From Natural Language Supervision},
      author={Patrick Schramowski and Kristian Kersting},
      year={2021},
      Url={https://arxiv.org/pdf/2110.04222.pdf},
      Note={Probing or fine-tuning (large-scale) pre-trained models results in state-of-the-art performance for many NLP tasks and, more recently, even for computer vision tasks when combined with image data. Unfortunately, these approaches also entail severe risks. In particular, large image datasets automatically scraped from the web may contain derogatory terms as categories and offensive images, and may also underrepresent specific classes. Consequently, there is an urgent need to carefully document datasets and curate their content. Unfortunately, this process is tedious and error-prone. We show that pre-trained transformers themselves provide a methodology for the automated curation of large-scale vision datasets. Based on human-annotated examples and the implicit knowledge of a CLIP based model, we demonstrate that one can select relevant prompts for rating the offensiveness of an image. In addition to e.g. privacy violation and pornographic content previously identified in ImageNet, we demonstrate that our approach identifies further inappropriate and potentially offensive content.},
      Anote={./images/offensiveimages.jpg},
      Howpublished = {arXiv preprint arXiv:2110.04222},
      Keywords={Dataset Curation, Dataset Documentation, Computer Vision, Pre-trained models, Prompt-tuning, CLIP}
}

@misc{zecevic2021scit,
  Anote = {./images/zecevic2021scit.jpg},
  title={Structural Causal Interpretation Theorem},
  author={Matej Zečević and Devendra Singh Dhami and Constantin A. Rothkopf and Kristian Kersting},
  Keywords = {Causality, Neural-Causal Models, Induction, Interpretations, Human Study},
  Note = {Human mental processes allow for qualitative reasoning about causality in terms of mechanistic relations
  of the variables of interest, which we argue are naturally described by structural causal model (SCM). Since
  interpretations are being derived from mental models, the same applies for SCM. By defining a metric space on SCM,
  we provide a theoretical perspective on the comparison of mental models and thereby conclude that interpretations
  can be used for guiding a learning system towards true causality. To this effect, we present a theoretical analysis
  from first principles that results in a human-readable interpretation scheme consistent with the provided causality
  that we name structural causal interpretations (SCI). Going further, we prove that any existing neural induction
  method (NIM) is in fact interpretable. Our first experiment (E1) assesses the quality of such NIM-based SCI. In
  (E2) we observe evidence for our conjecture on improved sample-efficiency for SCI-based learning. After conducting
  a small user study, in (E3) we observe superiority in human-based over NIM-based SCI in support of our
  initial hypothesis.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2110.02395.pdf},
  Howpublished = {arXiv preprint arXiv:2110.02395}
}

@misc{schramowski2021interactively,
    Anote = {./images/schramowski2021interactively.png},
    title = {Interactively Providing Explanations for Transformer Language Models},
    Url = {https://arxiv.org/pdf/2110.02058.pdf},
    Howpublished = {arXiv preprint arXiv:2120.02058},
    Note = {Transformer language models are state of the art in a multitude of NLP tasks. Despite these successes, their opaqueness remains problematic. Recent methods aiming to provide interpretability and explainability to black-box models primarily focus on post-hoc explanations of (sometimes spurious) input-output correlations. Instead, we emphasize using prototype networks directly incorporated into the model architecture and hence explain the reasoning process behind the network's decisions. Moreover, while our architecture performs on par with several language models, it enables one to learn from user interactions. This not only offers a better understanding of language models but uses human capabilities to incorporate knowledge outside of the rigid range of purely data-driven approaches.},
    author = {Felix Friedrich and Patrick Schramowski and Christopher Tauchmann and Kristian Kersting},
    month = {October},
    year = {2021},
    Keywords = {Explainable AI, XIL, Language Models, Transformer, Prototype Networks},
    Crossref = {}
}

@unpublished{kersting2021welt_voice,
    Anote = {./images/WeltAmSonntag.png},
    title={AI gives people their voices back},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Welt am Sonntag, September 19, 2021, page 23},
    Publisher = {},
    Note = {To talk to machines will soon be commonplace — Germany and Europe should have a say in this conversation.},
    Keywords = {AI, Deep Fake, Deep Learning, Speech Synthesis, Cancer, Val Kilmer, Survivor},
    Url={./papers/kersting2021welt_voice.pdf}
  }


@inproceedings{zecevic2021neurips_ispns,
  Anote = {./images/zecevic2021neurips_ispns.png},
  title={Interventional Sum-Product Networks: Causal Inference with Tractable Probabilistic Models},
  author={Matej Zečević and Devendra Singh Dhami and Athresh Karanam and Sriraam Natarajan and Kristian Kersting},
  Note = {While probabilistic models are an important tool for studying causality, doing so suffers from
  the intractability of inference. As a step towards tractable causal models, we consider the problem
  of learning interventional distributions using sum-product networks (SPNs) that are over-parameterized
  by gate functions, e.g., neural networks. Providing an arbitrarily intervened causal graph as input,
  effectively subsuming Pearl's do-operator, the gate function predicts the parameters of the SPN. The
  resulting interventional SPNs are motivated and illustrated by a structural causal model themed around
  personal health. Our empirical evaluation against competing methods from both generative and causal
  modelling demonstrates that interventional SPNs indeed are both expressive and causally adequate.},
  Keywords = {Probabilistic Circuits, Sum Product Networks, Deep Learning, Causality, Interventional Distribution, Tractable Causal Models},
  year={2021},
  Pages = {},
  Crossref = {},
  Url = {https://arxiv.org/pdf/2102.10440.pdf},
  booktitle = {Proceedings of the 35th Conference on Neural Information Processing Systems (NeurIPS)}
}

@inproceedings{ventola2021ilp_gcn,
  Anote = {./images/ventola2021ilp_gcn.png},
  title={Generative Clausal Networks: Relational Decision Trees as Probabilistic Circuits},
  author={Fabrizio Ventola and Devendra Singh Dhami and Kristian Kersting},
  Note = {In many real-world applications, the i.i.d. assumption does not hold and thus capturing the
  interactions between instances is essential for the task at hand. Recently, a clear connection between
  predictive modelling such as decision trees and probabilistic circuits, a form of deep probabilistic
  model, has been established although it is limited to propositional data.  We introduce the
  first connection between relational rule models and probabilistic circuits, obtaining tractable
  inference from discriminative rule models while operating on the relational domain. Specifically,
  given a relational rule model, we make use of Mixed Sum-Product Networks (MSPNs)--—a deep probabilistic
  architecture for hybrid domains--—to equip them with a full joint distribution over the class and how
  (often) the rules fire. Our empirical evaluation shows that we can answer a widerange of probabilistic
  queries on relational data while being robust to missing, out-of-domain data and partial counts. We show
  that our method generalizes todifferent distributions outperforming strong baselines. Moreover, due to the
  clear probabilistic semantics of MSPNs we have informative model interpretations.},
  Keywords = {Probabilistic Circuits, Sum Product Networks, Deep Learning, Relational Learing, Relational Decision Trees, Genrative Clausal Networks},
  year=2021,
  Url = {./papers/ventola2021ilp_gcn.pdf},
  booktitle = {Proceedings of the 30th International Conference on Inductive Logic Programming (ILP)}
}


@misc{yu2021span,
    Anote = {./images/yu2021span.png},
    title = {Sum-Product-Attention Networks: Leveraging Self-Attention in Probabilistic Circuits},
    Url = {https://arxiv.org/pdf/2109.06587.pdf},
    Howpublished = {arXiv preprint arXiv:2109.06587},
    Note = {Probabilistic circuits (PCs) have become the de-facto standard for learning and inference in probabilistic modeling. We introduce Sum-Product-Attention Networks (SPAN), a new generative model that integrates probabilistic circuits with Transformers. SPAN uses self-attention to select the most relevant parts of a probabilistic circuit, here sum-product networks, to improve the modeling capability of the underlying sum-product network. We show that while modeling, SPAN focuses on a specific set of independent assumptions in every product layer of the sum-product network. Our empirical evaluations show that SPAN outperforms state-of-the-art probabilistic generative models on various benchmark data sets as well is an efficient generative image model.},
    author = {Zhongjie Yu and Devendra Singh Dhami and Kristian Kersting},
    year = {2021},
    Keywords = {Self-Attention, Probabilistic Circuits, Sum-Product-Attention Network, Transformer}
}


@misc{thoma2021recowns,
    Anote = {./images/thoma2021recowns.png},
    title={RECOWNs: Probabilistic Circuits for Trustworthy Time Series Forecasting},
    author={Nils Thoma and Zhongjie Yu and Fabrizio Ventola and Kristian Kersting},
    year={2021},
    Url = {https://arxiv.org/pdf/2106.04148.pdf},
    Howpublished = {arXiv preprint arXiv:2106.04148},
    Note = {Time series forecasting is a relevant task that is performed in several real-world scenarios such as product sales analysis and prediction of energy demand. Given their accuracy performance, currently, Recurrent Neural Networks (RNNs) are the models of choice for this task. Despite their success in time series forecasting, less attention has been paid to make the RNNs trustworthy. For example, RNNs can not naturally provide an uncertainty measure to their predictions. This could be extremely useful in practice in several cases e.g. to detect when a prediction might be completely wrong due to an unusual pattern in the time series. Whittle Sum-Product Networks (WSPNs), prominent deep tractable probabilistic circuits (PCs) for time series, can assist an RNN with providing meaningful probabilities as uncertainty measure. With this aim, we propose RECOWN, a novel architecture that employs RNNs and a discriminant variant of WSPNs called Conditional WSPNs (CWSPNs). We also formulate a Log-Likelihood Ratio Score as an estimation of uncertainty that is tailored to time series and Whittle likelihoods. In our experiments, we show that RECOWNs are accurate and trustworthy time series predictors, able to "know when they do not know".},
    Keywords = {Whittle Likelihood, Time Series Forecasting, Uncertainty, Fourier Transform, Whittle Sum-Product Network, Spectral RNN, Probabilistic Circuits}
}


@misc{lang2021dafne,
      title={DAFNe: A One-Stage Anchor-Free Deep Model for Oriented Object Detection},
      author={Steven Lang and Fabrizio Ventola and Kristian Kersting},
      year={2021},
      url={https://arxiv.org/pdf/2109.06148.pdf},
      Howpublished = {arXiv preprint arXiv:2109.06148},
      Anote = {./images/dafne-header2.jpg},
      Crossref = {https://github.com/steven-lang/DAFNe},
      Keywords = {Computer Vision, Oriented Object Detection, One-Stage, Anchor-Free},
      Note = {Object detection is a fundamental task in computer vision. While approaches for axis-aligned bounding box detection
have made substantial progress in recent years, they perform poorly on oriented objects which are common in several realworld scenarios such as aerial view imagery and security
camera footage. In these cases, a large part of a predicted bounding box will, undesirably, cover non-object related areas. Therefore, oriented object detection has emerged with
the aim of generalizing object detection to arbitrary orientations. This enables a tighter fit to oriented objects, leading to
a better separation of bounding boxes especially in case of dense object distributions. The vast majority of the work in
this area has focused on complex two-stage anchor-based approaches, where the detection is split into a region-of-interest
identification step followed by the object localization and classification based on sets of predefined bounding box anchors. These anchors act as priors on the bounding box shape
and require attentive hyper-parameter fine-tuning on a perdataset basis, increased model size, and come with computational overhead. In this work, we present DAFNe: A Dense
one-stage Anchor-Free deep Network for oriented object detection. As a one-stage model, DAFNe performs predictions
on a dense grid over the input image, being architecturally simpler in design, as well as easier to optimize than its twostage counterparts. Furthermore, as an anchor-free model,
DAFNe reduces the prediction complexity by refraining from employing bounding box anchors. Moreover, we introduce
an orientation-aware generalization of the center-ness function for arbitrarily oriented bounding boxes to down-weight
low-quality predictions and a center-to-corner bounding box prediction strategy that improves object localization performance. Our experiments show that, to the best of our knowledge, DAFNe outperforms all previous one-stage anchor-free
models on DOTA 1.0. DAFNe improves the prediction accuracy over the previous best results by 4.65% mAP, setting the
new state-of-the-art results by achieving 76.95% mAP.}
}

@misc{zecevic2021rgnntscm,
  Anote = {./images/zecevic2021rgnntscm.png},
  title={Relating Graph Neural Networks to Structural Causal Models},
  author={Matej Zečević and Devendra Singh Dhami and Petar Veličković and Kristian Kersting},
  Keywords = {Causality, Graph Neural Networks, Neural-Causal Models},
  Note = {Causality can be described in terms of a structural causal model (SCM) that
  carries information on the variables of interest and their mechanistic
relations. For most processes of interest the underlying SCM will only be
partially observable, thus causal inference tries to leverage any exposed
information. Graph neural networks (GNN) as universal approximators on
structured input pose a viable candidate for causal learning, suggesting a
tighter integration with SCM. To this effect we present a theoretical analysis
from first principles that establishes a novel connection between GNN and SCM
while providing an extended view on general neural-causal models. We then
establish a new model class for GNN-based causal inference that is necessary
and sufficient for causal effect identification. Our empirical illustration on
simulations and standard benchmarks validate our theoretical proofs.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2109.04173.pdf},
  Howpublished = {arXiv preprint arXiv:2109.04173}
}

@book{vandenBroeck2021book_lifted,
  author    = {Guy {Van den Broeck} and Kristian Kersting and Sriraam Natarajan and David Poole (Eds.)},
  title     = {An Introduction to Lifted Probabilistic Inference},
  publisher = {MIT Press},
  series = {Neural Information Processing},
  year      = {2021},
  url       = {https://mitpress.mit.edu/books/introduction-lifted-probabilistic-inference},
  isbn       = {9780262542593},
  Keywords = {Probabilistic Models, Lifted Inference, Symmetries, Relational Models, Statistical Relational AI, Neuro-symbolic AI},
  Note    = {Statistical relational AI (StaRAI) studies the integration of reasoning under uncertainty with reasoning about individuals and relations. The representations used are often called relational probabilistic models. Lifted inference is about how to exploit the structure inherent in relational probabilistic models, either in the way they are expressed or by extracting structure from observations. This book covers recent significant advances in the area of lifted inference, providing a unifying introduction to this very active field.},
  Anote = {./images/vandenBroeck2021book_lifted.jpeg}
}


@unpublished{kersting2021welt_birthday,
    Anote = {./images/dieWelt_logo.png},
    title={Happy Birthday AI!},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Die Welt, August 02, 2021, page 10},
    Publisher = {},
    Note = {Artificial intelligence celebrates its 65th birthday - time to congratulate },
    Keywords = {AI, Claude Shannon, Bandwagon, Hype, 65 years of AI},
    Url={./papers/kersting2021welt_birthday.pdf}
  }

  @misc{zecevic2021iposcm,
  Anote = {./images/zecevic2021iposcm.jpg},
  title={Intriguing Parameters of Structural Causal Models},
  author={Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
  Keywords = {Adversarial attacks, Causality, Mathematical Programs, Perturbed Optimizers},
  Note = {In recent years there has been a lot of focus on adversarial attacks, especially on deep neural networks.
  Here, we argue that they are more general in nature and can easily affect a larger class of models, e.g., any
  differentiable perturbed optimizers. We further show that such attacks can be determined by the hidden confounders
  in a domain, thus drawing a novel connection between such attacks and causality. Establishing this causal perspective
  is characterized by the influence of the structural causal model's data generating process on the subsequent optimization
  thereby exhibiting intriguing parameters of the former. We reveal the existence of such parameters for three combinatorial
  optimization problems, namely linear assignment, shortest path and a real world problem of energy systems.
  Our empirical examination also unveils worrisome consequences of these attacks on differentiable perturbed
  optimizers thereby highlighting the criticality of our findings.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2105.12697.pdf},
  Howpublished = {arXiv preprint arXiv:2105.12697}
}

@misc{otte2021generative,
  Anote = {./images/otte2021generative.png},
  title={Generative Adversarial Neural Cellular Automata},
  author={Maximilian Otte and Quentin Delfosse and Johannes Czech and Kristian Kersting},
  Keywords ={Neural Cellular Automata, Generative Adversarial Network, Image Generation},
  Note = {Motivated by the interaction between cells, the recently introduced concept of Neural Cellular Automata shows
   promising results in a variety of tasks. So far, this concept was mostly used to generate images for a single
   scenario. As each scenario requires a new model, this type of generation seems contradictory to the adaptability of
   cells in nature. To address this contradiction, we introduce a concept using different initial environments as
   input while using a single Neural Cellular Automata to produce several outputs. Additionally, we introduce GANCA,
   a novel algorithm that combines Neural Cellular Automata with Generative Adversarial Networks, allowing for more
   generalization through adversarial training. The experiments show that a single model is capable of learning
   several images when presented with different inputs, and that the adversarially trained model improves
   drastically on out-of-distribution data compared to a supervised trained model.},
  month ={July},
  year={2021},
  Url = {https://arxiv.org/abs/2108.04328},
  Howpublished = {arXiv preprint arXiv:2108.04328},
  Crossref = {https://github.com/I3lacx/SoS}
}

@article{wainakh2022popets_attack,
  Anote = {./images/wainakh2021arxiv_attack.png},
  title={User Label Leakage from Gradients in Federated Learning},
  author={Aidmar Wainakh and Fabrizio Ventola and Till Müßig and Jens Keim and Carlos Garcia Cordero and Ephraim Zimmer and Tim Grube and Kristian Kersting and Max Mühlhäuser},
  Keywords = {Adversarial attacks, Federated Learning, Label Leakage, Gradients},
  Note = {Federated learning enables multiple users to build a joint
model by sharing their model updates (gradients), while their raw data
remains local on their devices. In contrast to the common belief that this
provides privacy benefits, we here add to the very recent results on privacy
risks when sharing gradients. Specifically, we propose Label Leakage from
Gradients (LLG), a novel attack to extract the labels of the users’ training
data from their shared gradients. The attack exploits the direction and
magnitude of gradients to determine the presence or absence of any
label. LLG is simple yet effective, capable of leaking potential sensitive
information represented by labels, and scales well to arbitrary batch sizes
and multiple classes. We empirically and mathematically demonstrate
the validity of our attack under different settings. Moreover, empirical
results show that LLG successfully extracts labels with high accuracy
at the early stages of model training. We also discuss different defense
mechanisms against such leakage. Our findings suggest that gradient
compression is a practical technique to prevent our attack.},
  year={2022},
  Pages = {227-244},
	Publisher = {Sciendo},
	Volume = {2022},
	number = {2},
	Journal = {Proceedings on Privacy Enhancing Technologies (PoPETS); a previous version also as arXiv preprint arXiv:2105.09369},
  Url = {https://arxiv.org/pdf/2105.09369.pdf},
  OptHowpublished = {arXiv preprint arXiv:2105.09369}
}


@article{ramanan2021dami,
          Anote = {./images/ramanan2021dami.png},
          Author = {Nandini Ramanan and Gautam Kunapuli and Tushar Khot and Bahare Fatemi and Seyed Mehran Kazemi nand David Poole and Kristian Kersting and Sriraam Natarajan},
          Journal = {Data Mining and Knowledge Discovary (DAMI)},
          Keywords = {Relational Learning, Logistic Regression, Boosting, Structure Learning},
          Note = {We consider the problem of learning Relational Logistic Regression (RLR). Unlike standard logistic regression, the features of RLRs are first-order
          formulae with associated weight vectors instead of scalar weights. We turn the problem of learning RLR to learning these vector-weighted formulae and develop
          a learning algorithm based on the recently successful functional-gradient boosting methods for probabilistic logic models. We derive the functional gradients
          and show how weights can be learned simultaneously in an efficient manner. Our empirical evaluation on standard and novel data sets demonstrates the superiority
          of our approach over other methods for learning RLR},
          Pages = {2089--2111},
          Publisher = {Springer},
          Title = {Structure Learning for Relational Logistic Regression: An Ensemble Approach},
          Url = {./papers/ramanan2021dami.pdf},
          Volume = {35},
          number = {},
          Year = {2021}
}

@article{burkhardt2021frontier,
          Anote = {./images/burkhardt2021frontier.png},
          Author = {Sophie Burkhardt and Jannis Brugger and Nicolas Wagner and Zahra Ahmadi and Kristian Kersting and Stefan Kramer},
          Journal = {Frontiers in Artificial Intelligence},
          Keywords = {Logical Rules, Deep Learning, Rule Extraction, Interpretable AI, First-Order Convolutional Rules},
          Note = {Most deep neural networks are considered to be black boxes, meaning their output is hard to interpret. In contrast,
          logical expressions are considered to be more comprehensible since they use symbols that are semantically close to natural language
          instead of distributed representations. However, for high-dimensional input data such as images, the individual symbols, i.e. pixels,
          are not easily interpretable. We introduce the concept of first-order convolutional rules, which are logical rules that can be extracted
          using a convolutional neural network (CNN), and whose complexity depends on the size of the convolutional filter and not on the dimensionality
          of the input. Our approach is based on rule extraction from binary neural networks with stochastic local search. We show how to extract
          rules that are not necessarily short, but characteristic of the input, and easy to visualize. Our experiments show that the proposed
          approach is able to model the functionality of the neural network while at the same time producing interpretable logical rules.},
          Pages = {90},
          Publisher = {Frontiers},
          Title = {Rule Extraction from Binary Neural Networks with Convolutional Rules for Model Validation},
          Url = {./papers/burkhardt2021frontier.pdf},
          Volume = {4},
          number = {},
          isbn = {DOI:10.3389/frai.2021.642263},
          Year = {2021}
}


@inproceedings{yu2021uai_momogps,
  Anote = {./images/yu2021uai_momogps.png},
  title={Leveraging Probabilistic Circuits for Nonparametric Multi-Output Regression},
  author={Zhongjie Yu and Mingye Zhu and Martin Trapp and Arseny Skryagin and Kristian Kersting},
  Note = {Inspired by recent advances in the field of expert-based approximations of Gaussian processes (GPs),
  we present an expert-based approach to large-scale multi-output regression using single-output GP experts.
  Employing a deeply structured mixture of single-output GPs encoded via a probabilistic circuit allows us to
  accurately capture correlations between multiple output dimensions. By recursively partitioning the covariate
  space and the output space, posterior inference in our model reduces to inference on single-output GP experts,
  which only need to be conditioned on a small subset of the observations. We show that inference can be
  performed exactly and efficiently in our model, that it can capture correlations between output dimensions and,
  hence, often outperforms approaches that do not incorporate inter-output correlations, as demonstrated on
  several datasets in terms of the negative log predictive density.},
  Keywords = {Probabilistic Circuits, Sum Product Networks, Deep Learning, Time Series, Gaussian Processes, Multi-Ouput, Regression},
  year={2021},
  Pages = {},
  Crossref = {https://github.com/ml-research/MOMoGP},
  Url = {./papers/yu2021uai_momogps.pdf},
  booktitle = {Proceedings of the 37th Conference on Uncertainty in Artificial Intelligence (UAI)}
}

@inproceedings{yu2021icml_wspn,
  Anote = {./images/yu2021icml_wspn.png},
  title={Whittle Networks: A Deep Likelihood Model for Time Series},
  author={Zhongjie Yu and Fabrizio Ventola and Kristian Kersting},
  Note = {While probabilistic circuits have been extensively explored for tabular data,
  less attention has been paid to time series. Here, the goal is to estimate joint densities among
  the entire time series and, in turn, determining, for instance, conditional independence relations
  between them. To this end, we propose the first probabilistic circuits (PCs) approach to model
  time series, called Whittle sum-product networks (WSPNs). WSPNs leverage the Whittle approximation,
  casting the likelihood in the frequency domain, and place a complex-valued sum-product network, the
  most prominent PC, over the frequencies. The conditional independence relations among the time series
  can then be determined efficiently in the spectral domain. Moreover, WSPNs can naturally be placed
  into the deep neural learning stack for time series, resulting in Whittle Networks, opening the
  likelihood toolbox for training deep neural models and inspecting their behaviour. Our experiments show
  that Whittle Networks can indeed capture complex dependencies between time series and provide a useful
  measure of uncertainty for neural networks.},
  Keywords = {Probabilistic Circuits, Deep Learning, Time Series, Complex-valued Sum-Product Network, Whittle Likelihood, Tractable Likelihood, Spectral, Fourier Transform},
  year={2021},
  Pages = {12177-12186},
  Crossref = {https://github.com/ml-research/WhittleNetworks},
  Url = {./papers/yu2021icml_wspn.pdf},
  booktitle = {Proceedings of the 38th International Conference on Machine Learning (ICML)}
}


@unpublished{kersting2021welt_angst,
    Anote = {./images/dieWelt_logo.png},
    title={The German AI Angst},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Die Welt, June 28., 2021, page 13},
    Publisher = {},
    Note = {Germany has a special relationship with AI. Superficial knowledge and diffuse anxieties pose a threat to "AI Made in Germany”},
    Keywords = {German Angst, AI, Reinforcement Learning, CERN for AI, Chip Design, online AI courses, Carolin Kebekus},
    Url={./papers/kersting2021welt_angst.pdf}
  }

@unpublished{kersting2021welt_cern,
    Anote = {./images/dieWelt_logo.png},
    title={To the moon and back again},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Die Welt, May 29., 2021, page 16},
    Publisher = {},
    Note = {A CERN for Artificial Intelligence could find the fundamental laws of intelligent behavior - a moonshot project with broad impact},
    Keywords = {CERN, AI, Climate Change, Energy Consumption, GPU, GPT-3, Zuse},
    Url={./papers/kersting2021welt_cern.pdf}
  }

@unpublished{kersting2021welt_health,
    Anote = {./images/dieWelt_logo.png},
    title={AI and data to fight diseases},
    author={Kristian Kersting and Andreas Maier},
    year={2021},
    Howpublished = {Die Welt, May 3., 2021, page 10},
    Publisher = {},
    Note = {Is data scarcity resulting in a standstill when it comes to fighting diseases with AI and data, or is it an overexploitation of our health data?},
    Keywords = {Health data, AI, AI in medicine, data protection, data donation, federated learning},
    Url={./papers/kersting2021welt_health.pdf}
  }


@misc{stelzner2021obsurf,
    Anote = {./images/stelzner2021obsurf.png},
    title = {Decomposing 3D Scenes into Objects via Unsupervised Volume Segmentation},
    Url = {https://arxiv.org/pdf/2104.01148.pdf},
    Howpublished = {arXiv preprint arXiv:2104.01148},
    Note = {We present ObSuRF, a method which turns a single image of a
    scene into a 3D model represented as a set of Neural Radiance Fields (NeRFs), with each NeRF
    corresponding to a different object. A single forward pass of an encoder network outputs a set of
    latent vectors describing the objects in the scene. These vectors are used independently to condition
    a NeRF decoder, defining the geometry and appearance of each object. We make learning more
    computationally efficient by deriving a novel loss, which allows training NeRFs on RGB-D inputs without
    explicit ray marching. After confirming that the model performs equal or better than state of the art on
    three 2D image segmentation benchmarks, we apply it to two multi-object 3D datasets: A multiview version
    of CLEVR, and a novel dataset in which scenes are populated by ShapeNet models. We find that after
    training ObSuRF on RGB-D views of training scenes, it is capable of not only recovering the 3D geometry
    of a scene depicted in a single input image, but also to segment it into objects, despite receiving no
    supervision in that regard.},
    author = {Karl Stelzner and Kristian Kersting and Adam R. Kosiorek},
    month = {April},
    year = {2021},
    Keywords = {Neural Radience Field, NeRF, 3D Scence, Unsupervised Segmention, Deep Learning, Multiview CLEVR,  MultiShapeNet},
    Crossref = {https://stelzner.github.io/obsurf/}
}

@misc{schramowski2021moral,
    Anote = {./images/schramowski2021moral.png},
    title = {Language Models have a Moral Dimension},
    Url = {https://arxiv.org/pdf/2103.11790.pdf},
    Howpublished = {arXiv preprint arXiv:2103.11790},
    Note = {Artificial writing is permeating our lives due to recent advances in large-scale,
    transformer-based language models (LMs) such as BERT, its variants, GPT-2/3, and others.
    Using them as pretrained models and fine-tuning them for specific tasks, researchers have
    extended the state of the art for many NLP tasks and shown that they not only capture linguistic
    knowledge but also retain general knowledge implicitly present in the data. These and other successes
    are exciting. Unfortunately, LMs trained on unfiltered text corpora suffer from degenerate
    and biased behaviour. While this is well established, we show that recent improvements of LMs also store
    ethical and moral values of the society and actually bring a ``moral dimension'' to surface: the
    values are capture geometrically by a direction in the embedding space, reflecting well the agreement
    of phrases to social norms implicitly expressed in the training texts. This provides a path for
    attenuating or even preventing toxic degeneration in LMs. Since one can now rate the (non-)normativity
    of arbitrary phrases without explicitly training the LM for this task, the moral dimension can be used
    as ``moral compass'' guiding (even other) LMs towards producing normative text, as we will show. },
    author = {Patrick Schramowski and Cigdem Turan and Nico Andersen and Constantin Rothkopf and Kristian Kersting},
    month = {March},
    year = {2021},
    Keywords = {Moral, Deontologic, GPT-3, Language Models, Transformer, Detoxifying, Bias, Subspace},
    Crossref = {}
}


@unpublished{kersting2021welt_quantum,
    Anote = {./images/dieWelt_logo.png},
    title={No Quantum of Solace},
    author={Kristian Kersting and Christian Bauckhage},
    year={2021},
    Howpublished = {Die Welt, March 29., 2021, page 12},
    Publisher = {},
    Note = {Quantum computing is an opportunity for Artificial Intelligence (AI), but should not mask the AI research and transfer
    right now — Conventional computing infrastructures for AI are more important than ever!},
    Keywords = {Quantum computing, AI, AI Compute Infrastructure},
    Url={./papers/kersting2021welt_quantum.pdf}
  }


@unpublished{kersting2021welt_bias,
    Anote = {./images/dieWelt_logo.png},
    title={Nobody is Perfect!},
    author={Kristian Kersting and Constantin Rothkopf},
    year={2021},
    Howpublished = {Die Welt, Feb. 27., 2021, page 16},
    Publisher = {},
    Note = {Both, humans and machines have many opportunities to "misclassify" the world around
them — they can and should learn from each other.},
    Keywords = {Bias, AI Algorithms, Humans, Discrimination},
    Url={./papers/kersting2021welt_bias.pdf}
  }


  @inproceedings{stammer2021cvpr_right,
    Anote = {./images/stammerWorkshop.png},
  	title={Right for the Right Concept: Revising Neuro-Symbolic Concepts by Interacting with their Explanations},
  	author={Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
  	Note = {Most explanation methods in deep learning map importance estimates for a model's prediction back to the original input space. These "visual" explanations are often insufficient, as the model's actual concept remains elusive. Moreover, without insights into the model's semantic concept, it is difficult -- if not impossible -- to intervene on the model's behavior via its explanations, called Explanatory Interactive Learning. Consequently, we propose to intervene on a Neuro-Symbolic scene representation, which allows one to revise the model on the semantic level, e.g. "never focus on the color to make your decision". We compiled a novel confounded visual scene data set, the CLEVR-Hans data set, capturing complex compositions of different objects. The results of our experiments on CLEVR-Hans demonstrate that our semantic explanations, i.e. compositional explanations at a per-object level, can identify confounders that are not identifiable using "visual" explanations only. More importantly, feedback on this semantic level makes it possible to revise the model from focusing on these confounding factors.},
  	Keywords = {Confoundner, Clever Hans, Concept Learner, Neuro-Symbolic, Object-based Deep Learning, Explanatory Interactive Learning, Explainable AI, CLEVR},
  	year={2021},
    Pages = {},
    Crossref = {https://github.com/ml-research/NeSyXIL},
  	Url = {./papers/stammer2021cvpr_nesysXIL.pdf},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}
  }


@inproceedings{czech2021icaps_mcgs,
    Anote = {./images/czech2020mcgs.png},
    title = {Improving {AlphaZero} using {Monte-Carlo} {Graph} {Search}},
    Url = {./papers/czech2021icaps_mcgs.pdf},
    booktitle = {Proceedings of the 31st International Conference on Automated Planning and Scheduling (ICAPS)},
    Note = {The AlphaZero algorithm has been successfully applied in a range of discrete domains, most notably board games. It utilizes a neural network, that learns a value and policy function to guide the exploration in a Monte-Carlo Tree Search. Although many search improvements have been proposed for Monte-Carlo Tree Search in the past, most of them refer to an older variant of the Upper Conﬁdence bounds for Trees algorithm that does not use a policy for planning. We introduce a new, improved search algorithm for AlphaZero which generalizes the search tree to a directed acyclic graph. This enables information ﬂow across different subtrees and greatly reduces memory consumption. Along with Monte-Carlo Graph Search, we propose a number of further extensions, such as the inclusion of -greedy exploration, a revised terminal solver and the integration of domain knowledge as constraints. In our evaluations, we use the CrazyAra engine on chess and crazyhouse as examples to show that these changes bring signiﬁcant improvements to AlphaZero.},
    author = {Johannes Czech and Patrick Korus and Kristian Kersting},
    year = {2021},
    Keywords = {Monte-Carlo Tree Search, Graph Search, Directed Acyclic Graphs, Epsilon-Greedy Search, Chess, Crazyhouse, Upper Confidence Bounds for Trees, AlphaZero},
    Crossref = {https://github.com/QueensGambit/CrazyAra}
}

@article{brugger2021plantPythology,
          Anote = {./images/brugger2021plantPythology.png},
          Author = {Anna Brugger and Patrick Schramowski and Stefan Paulus and Ulrike Steiner and Kristian Kersting and Anne-Katrin Mahlein},
          Journal = {Plant Pathology},
          Keywords = {Ultraviolet Range, Plant Phenotyping, Hyperspectral Imaging, Hordeum vulgare, Blumeria graminis fsp hordei, Deep Learning, Self-Attention},
          Note = {In recent studies, the potential of hyperspectral sensors for the detection of plant-pathogen interactions was expanded to the ultraviolet range (UV; 200-380 nm) to monitor stress processes in plants. A hyperspectral imaging set-up was established to highlight the influence of early plant-pathogen interactions on secondary plant metabolites. In this study, the plant-pathogen interactions of three different barley lines inoculated with Blumeria graminis f.sp. hordei (Bgh, powdery mildew) were carried out. One susceptible genotype (Ingrid wild type) and two resistant genotypes (Pallas 01, Mla1 and Mla12 based resistance and Pallas 22, mlo5 based resistance) were used. During the first 5 days after inoculation (dai) the plant reflectance patterns were recorded and in parallel plant metabolites relevant in host-pathogen interaction were studied. Hyperspectral measurements in the UV-range revealed that a differentiation between resistant and susceptible barley genotypes inoculated with Bgh is possible and distinct reflectance patterns were recorded for each genotype. The extracted and analyzed pigments and flavonoids correlated with the spectral data recorded. A classification of non-inoculated and inoculated samples with deep learning revealed that a high performance was achieved with self-attention networks. The subsequent feature importance identified wavelengths, which were most important for the classification, and these wavelengths were linked to pigments and flavonoids. Hyperspectral imaging in the UV-range allows for a characterisation of different resistance reactions, can be linked to changes of secondary plant metabolites with the advantage of being a non-invasive method and therefore enables a greater understanding of the plants' reaction to biotic stress as well as resistance reactions.},
          Pages = {1572-1582},
          Publisher = {Wiley},
          Title = {Spectral signatures in the UV-range can be combined with secondary plant metabolites by deep learning to characterise barley – powdery mildew interaction},
          Url = {./papers/brugger2021plantPythology.pdf},
          volume = {70},
          isbn = {DOI:10.1111/ppa.134},
          number = {7},
          Year = {2021}
}


@unpublished{kersting2021welt_aiexperts,
    Anote = {./images/dieWelt_logo.png},
    title={I am AI, and so’s my wife!},
    author={Kristian Kersting},
    year={2021},
    Howpublished = {Die Welt, Jan 30., 2021, page 16},
    Publisher = {},
    Note = {Not everything that calls itself AI is AI. That has to change, also for many self-proclaimed AI
experts.},
    Keywords = {Definition of AI, AI, AI experts, Certificates, self-proclaimed},
    Url={./papers/kersting2021welt_aiexperts.pdf}
  }


@inbook{czech2021dist,
    Anote = {./images/czech2021dist.png},
    title = {Distributed Methods for Reinforcement Learning Survey},
    Keywords = {Distributed methods, Multi-agent systems, Parallelism, Asynchronism, Population-based reinforcement learning},
    url = {https://doi.org/10.1007/978-3-030-41188-6_13},
    Note = {Distributed methods have become an important tool to address the issue of high computational requirements
    for reinforcement learning. With this survey, we present several distributed methods including multi-agent schemes,
    synchronous and asynchronous parallel systems, as well as population-based approaches. We introduce the general principle
    and problem formulation, and discuss the historical development of distributed methods. We also analyze technical challenges,
    such as process communication and memory requirements, and give an overview of different application areas.},
    booktitle = {Reinforcement Learning Algorithms: Analysis and Applications},
    publisher = {Springer},
    author = {Johannes Czech},
    editor = {Boris Belousov and Hany Abdulsamad and Pascal Klink and Simone Parisi and Jan Peters},
    year = {2021},
    pages = {151--161}
}

@inproceedings{shao2021aaai,
Anote = {./images/shao2021aaai.png},
Author = {Xiaoting Shao and Arseny Skryagin and Patrick Schramowski and Wolfgang Stammer and Kristian Kersting},
Booktitle = {Proceedings of the 35th AAAI Conference on Artificial Intelligence (AAAI)},
Note = {Explaining black-box models such as deep neural networks is becoming increasingly important as it helps to boost trust and debugging.
Popular forms of explanations map the features to a vector indicating their individual importance to a decision on the instance-level.
They can then be used to prevent the model from learning the wrong bias in data possibly due to ambiguity. For instance, Ross et al.'s
"right for the right reasons" propagates user explanations backwards to the network by formulating differentiable constraints based on input gradients.
Unfortunately, input gradients as well as many other widely used explanation methods form an approximation of the decision boundary and assume
the underlying model to be fixed. Here, we demonstrate how to make use of influence functions - a well known robust statistic - in the constraints
to correct the model’s behaviour more effectively. Our empirical evidence demonstrates that this "right for better reasons" (RBR) considerably
reduces the time to correct the classifier at training time and boosts the quality of explanations at inference time compared to input gradients.
Besides, we also showcase the effectiveness of RBR in correcting "Clever Hans"-like behaviour in real, high-dimensional domain.},
Keywords = {Confounders, Explanatory Interactive Learning, Explainable AI, Clever Hans, Human-centric AI},
Pages = {},
Url = {./papers/shao2021aaai.pdf},
Title = {Right for Better Reasons: Training Differentiable Models by Constraining their Influence Function},
Year = {2021}}


@article{zintler2020stem4d.,
  Anote = {./images/zintler2020stem4d.png},
  Author = {Alexander Zintler and Robert Eilhardt and Shuai Wang and Matus Krajnak and Patrick Schramowski and Wolfgang Stammer and Stefan Petzold and Nico Kaiser and Kristian Kersting and Lambert Alff and Leopoldo Molina-Luna},
  Journal = {Microscopy and Microanalysis},
  Keywords = {Machine Learning, Phase Determination, 4D-STEM Datas, Oxide Electronic Device Performance},
  Note = {Improving device reliability and performance in oxide electronic-based resistive memory applications requires a profound understanding of the microstructure and atomistic processes which define the device properties. Here, we investigated how 4D-STEM data and ML can be helpful tools in a large pipeline.},
  Pages = {1908 -- 1909},
  Publisher = {Cambridge University},
  Title = {Machine Learning Assisted Pattern Matching: Insight into Oxide Electronic Device Performance by Phase Determination in 4D-STEM Datasets},
  Url = {./papers/zintler2020stem4d.pdf},
  Volume = {26},
  number = {S2},
  Year = {2020}
}

@misc{czech2020mcgs,
    Anote = {./images/czech2020mcgs.png},
    title = {Monte-{Carlo} {Graph} {Search} for {AlphaZero}},
    Url = {http://arxiv.org/abs/2012.11045},
    Howpublished = {arXiv preprint arXiv:2012.11045},
    Note = {The AlphaZero algorithm has been successfully applied in a range of discrete domains, most notably board games. It utilizes a neural network, that learns a value and policy function to guide the exploration in a Monte-Carlo Tree Search. Although many search improvements have been proposed for Monte-Carlo Tree Search in the past, most of them refer to an older variant of the Upper Conﬁdence bounds for Trees algorithm that does not use a policy for planning. We introduce a new, improved search algorithm for AlphaZero which generalizes the search tree to a directed acyclic graph. This enables information ﬂow across different subtrees and greatly reduces memory consumption. Along with Monte-Carlo Graph Search, we propose a number of further extensions, such as the inclusion of -greedy exploration, a revised terminal solver and the integration of domain knowledge as constraints. In our evaluations, we use the CrazyAra engine on chess and crazyhouse as examples to show that these changes bring signiﬁcant improvements to AlphaZero.},
    author = {Johannes Czech and Patrick Korus and Kristian Kersting},
    month = {December},
    year = {2020},
    Keywords = {Monte-Carlo Tree Search, Graph Search, Directed Acyclic Graphs, Epsilon-Greedy Search, Chess, Crazyhouse, Upper Confidencebounds for Trees, AlphaZero},
    Crossref = {https://github.com/QueensGambit/CrazyAra}
}

@misc{stammer2020right,
  Anote = {./images/stammerWorkshop.png},
	title={Right for the Right Concept: Revising Neuro-Symbolic Concepts by Interacting with their Explanations},
	author={Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
	Note = {Most explanation methods in deep learning map importance estimates for a model's prediction back to the original input space. These "visual" explanations are often insufficient, as the model's actual concept remains elusive. Moreover, without insights into the model's semantic concept, it is difficult -- if not impossible -- to intervene on the model's behavior via its explanations, called Explanatory Interactive Learning. Consequently, we propose to intervene on a Neuro-Symbolic scene representation, which allows one to revise the model on the semantic level, e.g. "never focus on the color to make your decision". We compiled a novel confounded visual scene data set, the CLEVR-Hans data set, capturing complex compositions of different objects. The results of our experiments on CLEVR-Hans demonstrate that our semantic explanations, i.e. compositional explanations at a per-object level, can identify confounders that are not identifiable using "visual" explanations only. More importantly, feedback on this semantic level makes it possible to revise the model from focusing on these confounding factors.},
	Keywords = {Confoudner, Clever Hans, Concept Learner, Neuro-Symbolic, Object-based Deep Learning, Explanatory Interactive Learning, Explainable AI, CLEVR},
	year={2020},
  Pages = {},
	Url = {https://arxiv.org/abs/2011.12854},
  Howpublished = {arXiv preprint arXiv:2011.12854}
}

@misc{delfosse2021recurrent,
  Anote = {./images/delfosse2021recurrent.png},
  title={Recurrent Rational Networks},
  author={Quentin Delfosse and Patrick Schramowski and Alejandro Molina and Kristian Kersting},
  Keywords = {Deep Learning, Rational Function, Rational Network, Activation Function, Reinforcement Learning},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2102.09407.pdf},
  Howpublished = {arXiv preprint arXiv:2102.09407}
}

@misc{zecevic2021ispn,
  Anote = {./images/zecevic2021ispn.png},
  title={Interventional Sum-Product Networks: Causal Inference with Tractable Probabilistic Models},
  author={Matej Zečević and Devendra Singh Dhami and Athresh Karanam and Sriraam Natarajan and Kristian Kersting},
  Keywords = {Probabilitic Models, Causality, Tractability, Sum-Product Networks, Interventional Distributions},
  Note = {While probabilistic models are an important tool for studying causality, doing so suffers from the intractability of inference. As a step towards tractable causal models, we consider the problem of learning interventional distributions using sum-product networks (SPNs) that are over-parameterized by gate functions, e.g., neural networks. Providing an arbitrarily intervened causal graph as input, effectively subsuming Pearl's do-operator, the gate function predicts the parameters of the SPN. The resulting interventional SPNs are motivated and illustrated by a structural causal model themed around personal health. Our empirical evaluation on three benchmark data sets as well as a synthetic health data set clearly demonstrates that interventional SPNs indeed are both expressive in modelling and flexible in adapting to the interventions.},
  year={2021},
  Pages = {},
  Url = {https://arxiv.org/pdf/2102.10440.pdf},
  Howpublished = {arXiv preprint arXiv:2102.10440}
}

@unpublished{kersting2020welt_gpt3,
    Anote = {./images/dieWelt_logo.png},
    title={Europe must invest in GPT-3 to avoid falling further behind},
    author={Kristian Kersting},
    year={2020},
    Howpublished = {Die Welt, Dec 30., 2020, page 10},
    Publisher = {},
    Note = {AI systems come primarily from the U.S. - including the particularly clever text generator GPT-3. It cost millions, but the investment will pay off. That's why Europe must catch up, or the continent will face a dangerous defeat.},
    Keywords = {AI Infrastructure, Deep Learning, GPT-3, Language Model, hessian.AI, ELLIS, CLAIRE},
    Url={./papers/kersting2020welt_gpt3.pdf}
  }

@unpublished{hoos2020faz_3ai,
    Anote = {./images/faz_logo.jpg},
    title={The Third Wave of AI},
    author={Holger Hoos and Kristian Kersting},
    year={2020},
    Howpublished = {Frankfurter Allgemeine Zeitung (FAZ), Dec. 14, 2020, page 20},
    Publisher = {},
    Note = {Thinking like people: Why this key technology offers a historic opportunity for Germany and Europe right now.},
    Keywords = {Third Wave of AI, Hybrid AI, hessian.AI, ELLIS, CLAIRE, Research Centers},
    Url={./papers/hoos2020faz_3ai.pdf}
  }


  @article{kersting2020ki_editorial,
    Anote = {./images/ki2018.jpg},
    Author = {Kristian Kersting},
    Journal = {Künstliche Intelligenz (KI)},
    Keywords = {3rd Wave of AI, Systems AI, Editorial, Learning, Reasoning},
    Note = {},
    Pages = {435--437},
    Publisher = {Springer},
    Title = {Rethinking Computer Science Through AI},
    Url = {http://link.springer.com/article/10.1007/s13218-020-00692-5},
    Volume = {34},
    number = {},
    Year = {2020},
  }


@article{fernandez2021cm_materialANN,
    Anote = {./images/fernandez2020materialANN_researchGate.png},
    title={Anisotropic hyperelastic constitutive models for finite deformations combining material theory and data-driven approaches with application to cubic lattice metamaterials},
    author={Mauricio Fernandezand Mostafa Jamshidian and Thomas Bohlke and Kristian Kersting and Oliver Weeger},
    year={2021},
    journal = {Computational Mechanics},
    Publisher = {Springer},
    Volume = {67},
    number = {},
    Pages = {653--677},
    Note = {This work investigates the capabilities of anisotropic theory-based, purely data-driven and hybrid approaches to model the homogenized constitutive behavior of cubic lattice metamaterials exhibiting large deformations and buckling phenomena. The effective material behavior is assumed as hyperelastic, anisotropic and finite deformations are considered. A highly flexible analytical approach proposed by Itskov (2001) is taken into account, which ensures material objectivity and fulfillment of the material symmetry group conditions. Then, two non-intrusive data-driven approaches are proposed, which are built upon artificial neural networks and formulated such that they also fulfill the objectivity and material symmetry conditions. Finally, a hybrid approach combing the approach of Itskov (2001) with artificial neural networks is formulated. Here, all four models are calibrated with simulation data of the homogenization of two cubic lattice metamaterials at finite deformations. The data-driven models are able to reproduce the calibration data very well and reproduce the manifestation of lattice instabilities. Furthermore, they achieve superior accuracy over the analytical model also in additional test scenarios. The introduced hyperelastic models are formulated as general as possible, such that they can not only be used for lattice structures, but for any anisotropic hyperelastic material. Further, access to the complete simulation data is provided through the public repository https://github.com/CPShub/sim-data.},
      Keywords = {Neural Networks, Symmetries, Material Science},
    Url={./papers/fernandez2020compMech.pdf},
    Crossref = {https://github.com/CPShub/sim-data}
  }


@inproceedings{turan2020icmi_alfie,
Anote = {./images/turan2020icmi_alfie.png},
Author = {Cigdem Turan and Patrick Schramowski and Constantin Rothkopf and Kristian Kersting},
Booktitle = {Proceedings of the International Conference on Multimodal Interaction (ICMI)},
Note = {This work introduces Alfie, an interactive robot that is capable of
answering moral (deontological) questions of a user. The interaction
of Alfie is designed in a way in which the user can offer an
alternative answer when the user disagrees with the given answer
so that Alfie can learn from its interactions. Alfie’s answers are
based on a sentence embedding model that uses state-of-the-art
language models, e.g. Universal Sentence Encoder and BERT. Alfie
is implemented on a Furhat Robot, which provides a customizable
user interface to design a social robot.},
Keywords = {Interactive Robot, Human Bias,Transformers, Human-centric AI},
Pages = {},
Url = {./papers/turan2020icmi_alfie.pdf},
Title = {Alfie: An Interactive Robot with a Moral Compass},
Year = {2020}}


@incollection{stelzner2020ood_gast,
    Anote = {./images/stelzner2020ood_gast.png},
    Author = {Karl Stelzner and Kristian Kersting and Adam R. Kosiorek},
    Booktitle = {Working Notes of the ICML 2020 Workshop on Object-Oriented Learning (OOL)},
    Note = {Groups of entities are naturally represented assets,  but  generative  models  usually
    treat  themas independent from each other or as sequences.This either over-simplifies the problem,
    or im-poses an order to the otherwise unordered collections, which has to be accounted for
    in loss computation. We therefore introduce generative adversarial set transformer (GAST)—a GAN for sets
    capable of generating variable-sized sets in a permutation-equivariant manner, while accounting for
    dependencies between set elements.   It avoids the problem of formulating a distance met-ic between sets
    by using a permutation-invariant discriminator. When evaluated on a dataset of regular polygons and on
    MNIST point clouds, GAST outperforms graph-convolution-based GANs in sample fidelity, while showing
    good generalization to novel set sizes.},
      Keywords = {Deep Learning, Transformers, Sets, Permutation Invariance},
    Pages = {},
    Title = {Generative Adversarial Set Transformers},
    Url = {./papers/stelzner2020ood_gast.pdf},
    crossref = {},
    Year = {2020}}

    @incollection{skryagin2020bbpr_spl,
        Anote = {./images/skryagin2020bbpr_spl.png},
        Author = {Arseny Skryagin and Karl Stelzner and Alejandro Molina and Fabrizio Ventola and Zhongjie Yu and Kristian Kersting},
        Booktitle = {Working Notes of the ICML 2020 Workshop on Bridge Between Perception and Reasoning: Graph Neural Networks and Beyond},
        Note = {We introduce Sum-Product Logic (SPLog), a deep probabilistic logic programming language that incorporates learning through
        predicates encoded as probabilistic circuits, specifically sum-product networks.  We show how existing
        inference and learning techniques can be adapted for the new language. Our empirical illustrations demonstrate that the benefits
        of supporting symbolic and deep representations, both neural and probabilistic circuit ones for inference and
        (deep) learning from examples. To the best of our knowledge, this work is the first to propose a framework where deepn eural networks,
        probabilistic circuits, expressive probabilistic-logical modeling and reasoning are integrated.},
          Keywords = {Hybrid AI, DeepProbLog, Sum-Product Networks, Sum-Product Logic},
        Pages = {},
        Title = {Sum-Product Logic: Integrating Probabilistic Circuits into DeepProbLog},
        Url = {./papers/skryagin2020bbpr_spl.pdf},
        crossref = {},
        Year = {2020}}


    @inproceedings{solan2021icwsm,
  Anote = {./images/solan2021icwsm.png},
  Author = {David Solans and Christopher Tauchmann and Karolin E. Kappler and Carlos Castilio and Ans-Hendrik Huber and Aideen Farrell and Kristian Kersting},
  Booktitle = {Proceedings of the International AAAI Conference on Weblogs and Social Media (ICWSM)},
  Note = {Artificial Intelligence (AI) and its relation with societies has become an increasingly interesting subject of study for the social sciences. Nevertheless, there is still an important lack of interdisciplinary and empirical research applying social theories to the field of AI. We here aim to shed light on the interactions between humans and autonomous systems and analyse the moral conventions, which underly these interactions and cause moments of conflict and cooperation. For this purpose we employ the Economics of Convention (EC), originally developed in the context of economic processes of production and management involving humans, objects and machines. We create a dataset from three relevant text sources and perform a qualitative exploration of its content. Then, we train a combination of Machine Learning (ML) classifiers on this dataset, which achieve an average classification accuracy of 83.7%. A qualitative and quantitative evaluation of the predicted conventions reveals, inter alia, that the Industrial and Inspired conventions tend to co-exist in the AI domain. This is the first time, ML classifiers are used to study the EC in different AI-related text types. Our analysis of a larger dataset is especially beneficial for the social sciences.},
    Keywords = {Classification, Morals, Conventions, Content Analysis, GitHub, Scientific Articles, Online Discussions, Economics of Convention},
  Pages = {},
  Url = {./papers/solan2021icwsm.pdf},
  Title = {Learning to Classify Morals and Conventions: Artificial Intelligence in Terms of the Economics of Convention},
  Year = {2021}}

  @misc{solan2019arxiv,
Anote = {./images/solan2020icwsm.png},
Author = {David Solans and Christopher Tauchmann and Karolin E. Kappler and Carlos Castilio and Ans-Hendrik Huber and Aideen Farrell and Kristian Kersting},
Booktitle = {Proceedings of the 15th International AAAI Conference on Weblogs and Social Media (ICWSM)},
Note = {Artificial Intelligence (AI) and its relation with societies has become an increasingly interesting subject of study for the social sciences. Nevertheless, there is still an important lack of interdisciplinary and empirical research applying social theories to the field of AI. We here aim to shed light on the interactions between humans and autonomous systems and analyse the moral conventions, which underly these interactions and cause moments of conflict and cooperation. For this purpose we employ the Economics of Convention (EC), originally developed in the context of economic processes of production and management involving humans, objects and machines. We create a dataset from three relevant text sources and perform a qualitative exploration of its content. Then, we train a combination of Machine Learning (ML) classifiers on this dataset, which achieve an average classification accuracy of 83.7%. A qualitative and quantitative evaluation of the predicted conventions reveals, inter alia, that the Industrial and Inspired conventions tend to co-exist in the AI domain. This is the first time, ML classifiers are used to study the EC in different AI-related text types. Our analysis of a larger dataset is especially beneficial for the social sciences.},
  Keywords = {Classification, Morals, Conventions, Content Analysis, GitHub, Scientific Articles, Online Discussions, Economics of Convention},
  year={2019},
  Pages = {},
  Url = {https://arxiv.org/pdf/1910.12591.pdf},
  Howpublished = {arXiv preprint arXiv:1910.12591},
Title = {Conflict and Cooperation: AI Research and Development in terms of the Economy of Conventions}
}


  @inproceedings{shao2020pgm_cspns,
  Anote = {./images/shao2019tpm.png},
  Author = {Xiaoting Shao and Alejandro Molina and Antonio Vergari and Karl Stelzner and Robert Perharz and Thomas Liebig and Kristian Kersting},
  Booktitle = {Proceedings of the 10th International Conference on Probabilistic Graphical Models (PGM)},
  Note = {Probabilistic graphical models are a central tool in AI; however, they are generally not as expressive as deep neural models, and inference is notoriously hard and slow. In contrast, deep probabilistic models such as sum-product networks (SPNs) capture joint distributions in a tractable fashion, but still lack the expressive power of
intractable models based on deep neural networks. Therefore, we introduce conditional SPNs (CSPNs), conditional density estimators for multivariate and potentially hybrid domains that allow harnessing the expressive power of neural networks while still maintaining tractability guarantees. One way to implement CSPNs is to use an existing SPN structure and condition its parameters on the input, e.g., via a deep neural network. Our experimental evidence demonstrates that CSPNs are competitive with other probabilistic models and yield superior performance on multilabel image classification compared to mean field and mixture density networks. Furthermore, they can successfully be employed as building blocks for structured probabilistic models, such as autoregressive image models.},
    Keywords = {Probabilistic Circuits, Sum Product Networks, Conditional Distribution, Gating Functions, Deep Models},
  Pages = {},
  Url = {./papers/shao2020pgm_cspns.pdf},
  Title = {Conditional Sum-Product Networks: Imposing Structure on Deep Probabilistic Architectures},
  Year = {2020}}

  @inproceedings{ventola2020pgm_resspns,
    Anote = {./images/ventola2019rspf.png},
    Author = {Fabrizio Ventola and Karl Stelzner and Alejandro Molina and Kristian Kersting},
    Booktitle = {Proceedings of the 10th International Conference on Probabilistic Graphical Models (PGM)},
    Note = {Tractable yet expressive density estimators are a key building block of probabilistic machine learning. While sum-product networks (SPNs) offer attractive inference capabilities, obtaining structures large enough to fit complex, high-dimensional data has proven challenging. In this paper, we present a residual learning approach to ease the learning of SPNs, which are deeper and wider than those used previously. The main trick is to ensemble SPNs by explicitly reformulating sum nodes as residual functions. This adds references to substructures across the SPNs at different depths, which in turn helps to improve training. Our experiments demonstrate that the resulting residual SPNs (ResSPNs) are easy to optimize, gain performance from considerably increased depth and width, and are competitive to state-of-the-art SPN structure learning approaches. To combat overfitting, we introduce an iterative pruning technique that compacts models and yields better generalization.},
      Keywords = {Probabilistic Circuits, Sum Product Networks, Residual Networks, Lottery Ticket Hypothesis, Sparse Deep Networks},
    Pages = {},
    Url = {./papers/ventola2020pgm_resspns.pdf},
    Title = {Residual Sum-Product Networks},
    Year = {2020}}

    @inproceedings{ramanan2020pgm_boostedAC,
      Anote = {./images/ramanan2020pgm_boostedAC.png},
      Author = {Nandini Ramanan and Mayukh Das and Kristian Kersting and Sriraam Natarajan},
      Booktitle = {Proceedings of the 10th International Conference on Probabilistic Graphical Models (PGM)},
      Note = {Arithmetic Circuits (AC) and Sum-Product Networks (SPN) have recently gained significant interest by virtue of being tractable deep probabilistic models. We propose the first gradient-boosted method for structure learning of discriminative ACs (DACs), called DACBoost. In discrete domains ACs are essentially equivalent to mixtures of trees, thus DACBoost decomposes a large AC into smaller tree-structured ACs and learns them in a sequential, additive manner. The resulting non-parametric manner of learning the DACs results in a model with very few tuning parameters making our learned model significantly more efficient. We demonstrate on standard data sets and some real-world data sets, the efficiency of DACBoost compared to the state-of-the-art DAC learners without sacrificing the effectiveness.},
        Keywords = {Arithmetic Circuits, Sum Product Networks, Structure Learning, Boosting},
      Pages = {},
      Url = {./papers/ramanan2020pgm_boostedAC.pdf},
      Title = {Discriminative Non-Parametric Learning of Arithmetic Circuits},
      Year = {2020}}

  @article{schramowski2020nmi_plantxml,
    Anote = {./images/schramowski2020arxiv_plantxml.jpg},
    Author = {Patrick Schramowski and Wolfgang Stammer and Stefano Teso and Anna Brugger and Franziska Herbert and Xiaoting Shao and Hans-Georg Luigs and Anne-Katrin Mahlein and Kristian Kersting},
    Journal = {Nature Machine Intelligence},
    Note = {Deep neural networks have shown excellent performances in many real-world applications such as plant phenotyping.
    Unfortunately, they may show "Clever Hans"-like behaviour--- making use of confounding factors within datasets---to achieve
    high prediction rates. Rather than discarding the trained models or the dataset, we show that interactions between the
    learning system and the human user can correct the model. Specifically, we revise the models decision process by adding
    annotated masks during the learning loop and penalize decisions made for wrong reasons. In this way the decision strategies
    of the machine can be improved, focusing on relevant features, without considerably dropping predictive performance.},
      Keywords = {Deep Learning, Interactive Machine Learning, Explainable AI, Explanatory Interactive ML, Clever Hans, Plant Phenotyping},
    Publisher = {Nature Publishing Group},
    Volume = {2},
    number = {},
    Pages = {476-486},
    crossref = {https://codeocean.com/capsule/7818629/tree/v1},
    Title = {Making deep neural networks right for the right scientific reasons by interacting with their explanations},
    Url = {https://arxiv.org/pdf/2001.05371.pdf},
    Year = {2020}}


@misc{fernandez2020materialANN_researchGate,
    Anote = {./images/fernandez2020materialANN_researchGate.png},
    title={Anisotropic hyperelastic constitutive models for finite deformations combining material theory and data-driven approaches with application to cubic lattice metamaterials},
    author={Mauricio Fernandezand Mostafa Jamshidian and Thomas Bohlke and Kristian Kersting and Oliver Weeger},
    year={2020},
    Howpublished = {DOI: 10.13140/RG.2.2.19350.78409},
    Note = {This work investigates the capabilities of anisotropic theory-based, purely data-driven and hybrid approaches to model the homogenized constitutive behavior of cubic lattice metamaterials exhibiting large deformations and buckling phenomena. The effective material behavior is assumed as hyperelastic, anisotropic and finite deformations are considered. A highly flexible analytical approach proposed by Itskov (2001) is taken into account, which ensures material objectivity and fulfillment of the material symmetry group conditions. Then, two non-intrusive data-driven approaches are proposed, which are built upon artificial neural networks and formulated such that they also fulfill the objectivity and material symmetry conditions. Finally, a hybrid approach combing the approach of Itskov (2001) with artificial neural networks is formulated. Here, all four models are calibrated with simulation data of the homogenization of two cubic lattice metamaterials at finite deformations. The data-driven models are able to reproduce the calibration data very well and reproduce the manifestation of lattice instabilities. Furthermore, they achieve superior accuracy over the analytical model also in additional test scenarios. The introduced hyperelastic models are formulated as general as possible, such that they can not only be used for lattice structures, but for any anisotropic hyperelastic material. Further, access to the complete simulation data is provided through the public repository https://github.com/CPShub/sim-data.},
      Keywords = {Neural Networks, Symmetries, Material Science},
    Pages = {},
    Url={./papers/fernandez2020materialANN_researchGate.pdf}
  }

@misc{das2020arxiv_gbql,
    Anote = {./images/das2020arxiv_gbql.png},
    title={Fitted Q-Learning for Relational Domains},
    author={Srijita Das and Sriraam Natarajan and Kaushik Roy and Ronald Parr and Kristian Kersting},
    year={2020},
    Howpublished = {arXiv preprint arXiv:2006.05595},
    Note = {We consider the problem of Approximate Dynamic Programming in relational domains.
    Inspired by the success of fitted Q-learning methods in propositional settings, we develop
    the first relational fitted Q-learning algorithms by representing the value function and
    Bellman residuals. When we fit the Q-functions, we show how the two steps of Bellman operator;
    application and projection steps can be performed using a gradient-boosting technique. Our
    proposed framework performs reasonably well on standard domains without using
    domain models and using fewer training trajectories.},
      Keywords = {Relational Learning, Reinforcement Learning, Q Learning, Functional Boosting },
    Pages = {},
    Url={https://arxiv.org/pdf/2006.05595.pdf}
}

  @incollection{morris2020grlb,
      Anote = {./images/morris2020grlb.png},
      Author = {Christopher Morris and Niels Kriege and Franka Bause and Kristian Kersting and Petra Mutzel and Marion Neumann},
      Booktitle = {Working Notes of the ICML 2020 Workshop on Graph Representation Learning and Beyond (GRL+); also as arXiv preprint arXiv:2007.08663},
      Note = {Recently, there has been an increasing interest in (supervised) learning with graph data,
      especially using graph neural networks. However, the development of meaningful benchmark datasets
      and standardized evaluation procedures is lagging. That is, most papers still evaluate their methods
      on small-scale datasets leading to high standard deviations and hard to interpret results,
      consequently hindering advancements in this area. To address this, we introduce the
      TUDataset for graph classification and regression. The collection consists of over 120 datasets of
      varying sizes from a wide range of applications. We provide Python-based data loaders, kernel,
      and graph neural network baseline methods implementations, and evaluation tools. Here, we give an
      overview of the datasets, standardized evaluation procedures, and provide baseline experiments.},
        Keywords = {Graph Neural Networks, Graph Kernels, 120 Graph Datasets, Benchmarks},
      Pages = {},
      Title = {TUDataset: A collection of benchmark datasets for learning with graphs},
      Url = {./papers/morris2020grlb.pdf},
      crossref = {https://chrsmrrs.github.io/datasets/},
      Year = {2020}}


      @inproceedings{shao2020sum_msp,
        Anote = {./images/shao2020sum_msp.png},
        Author = {Xiaoting Shao and Arseny Skryagin and Zhongjie Yu and Tjitze Rienstra and Matthias Thimm and Kristian Kersting},
        Booktitle = {Proceedings of the 14th International Conference on Scalable Uncertainty Management (SUM)},
        Note = {Spohnian ranking functions are a qualitative abstraction of
probability functions, and they have been applied to knowledge representation and reasoning that involve uncertainty. However, how to represent
a ranking function which has a size that is exponential in the number of
variables still remains insufficiently explored. In this work, we introduce
min-sum networks (MSNs) for a compact representation of ranking functions for multiple variables. This representation allows for exact inference
with linear cost in the size of the number of nodes.},
          Keywords = {Multivariate Ranking, Ranking Function, Arithmetic Circuits, Min-Sum Networks, Deep Models},
        Pages = {},
        Url = {./papers/shao2020sum_msp.pdf},
        Title = {Modelling Multivariate Ranking Functions with Min-Sum Networks},
        Year = {2020}}


@inproceedings{rienstra2020kr_dsepArg,
  Anote = {./images/rienstra2020kr_dsepArg.png},
  Author = {Tjitze Rienstra and Matthias Thimm and Kristian Kersting and Xiaoting Shao},
  Booktitle = {Proceedings of the 18th International Conference on Principles of Knowledge Representation and Reasoning (KR)},
  Note = {We investigate the notion of independence in abstract argumentation,
i. e., the question of whether the evaluation of one
set of arguments is independent of the evaluation of another
set of arguments, given that we already know the status of
a third set of arguments. We provide a semantic definition
of this notion and develop a method to discover independencies
based on transforming an argumentation framework into
a DAG on which we then apply the well-known d-separation
criterion. We also introduce the SCC Markov property for argumentation
semantics, which generalises the Markov property
from the classical acyclic case and guarantees soundness
of our approach},
    Keywords = {DAG, Abstract Argumentation, d-Separation},
  Pages = {},
  Url = {./papers/rienstra2020kr_dsepArg.pdf},
  Title = {Independence and D-separation in Abstract Argumentation},
  Year = {2020}}


@article{shao2020infspek_argML,
        Anote = {./images/shao2020infspek_argML.png},
        Author = {Xiaoting Shao and Tjitze Rienstra and Matthias Thimm and Kristian Kersting},
        Journal = {Informatik Spektrum },
        Keywords = {Right for the right reasons, interactive ML, Explainable AI, Sum-product networks},
        Note = {Machine learning and argumentation can potentially greatly benefit from each other.
        Combining deep classifiers with knowledge expressed in the form of rules and constraints
        allows one to leverage different forms of abstractions within argumentation mining.
        Argumentation for ma-chine learning can yield argumentation-based learning methods where the
        machine and the user argue about the learned model with the common goal of providing results of
         maximum utility to the user. Unfortunately, both directions are currently rather challenging.
         For instance, combining deep neural models with logic typically only yields determinisic
         results, while combining probabilistic models with logic often results in intractable
         inference. Therefore, we review a novel deep but tractable model for conditional probability
         distributions  that  can  harness  the  expressive  power  of universal  function  approximators
         such  as  neural  networks while still maintaining a wide range of tractable inference routines.
         While this new model has shown appealing performance in classification tasks, humans cannot
         easily under-stand the reasons for its decision. Therefore, we also review our recent efforts
         on how to “argue” with deep models. On synthetic and real data we illustrate how “arguing”
         with a deep model about its explanations can actually help to revise the model, if it is
         right for the wrong reasons. We only sketch and review our recent efforts. More details can be
         found in the corresponding publications as well as current submissions to conferences and journals.},
        Pages = {},
        Publisher = {},
        Title = {Towards Understanding and Arguing with Classifiers: Recent Progress},
        Url = {./papers/shao2020infspek_argML.pdf},
        Volume = {},
        number = {},
        Pages = {},
        Year = {2020},
    }


@inproceedings{peharz2020icml_einsum,
  Anote = {./images/peharz2020arxiv_einsum.png},
  author={Robert Peharz and Steven Lang and Antonio Vergari and Karl Stelzner and Alejandro Molina and Martin Trapp and Guy Van den Broeck and Kristian Kersting and Zoubin Ghahramani},
  Booktitle = {Proceedings of the 37th International Conference on Machine Learning (ICML); a previous version also as arXiv preprint arXiv:2004.06231},
  Keywords = {Deep Learning, Probabilistic Circuits, Sum Product Networks, Einsum Operation, Scaling },
  Note = {Probabilistic circuits (PCs) are a promising avenue for probabilistic modeling, as they permit a wide range of exact and efficient inference routines. Recent ``deep-learning-style'' implementations of PCs strive for a better scalability, but are still difficult to train on real-world data, due to their sparsely connected computational graphs. In this paper, we propose Einsum Networks (EiNets), a novel implementation design for PCs, improving prior art in several regards. At their core, EiNets combine a large number of arithmetic operations in a single monolithic einsum-operation, leading to speedups and memory savings of up to two orders of magnitude, in comparison to previous implementations. As an algorithmic contribution, we show that the implementation of Expectation-Maximization (EM) can be simplified for PCs, by leveraging automatic differentiation. Furthermore, we demonstrate that EiNets scale well to datasets which were previously out of reach, such as SVHN and CelebA, and that they can be used as faithful generative image models.},
  Pages = {},
  Title = {Einsum Networks: Fast and Scalable Learning of Tractable Probabilistic Circuits},
  Url = {http://proceedings.mlr.press/v119/peharz20a/peharz20a.pdf},
  Crossref = {https://github.com/cambridge-mlg/EinsumNetworks},
  Year = {2020}}


@article{schramowski2020moral,
    Anote = {./images/schramowski2020moral.png},
    Author = {Patrick Schramowski and Cigdem Turan and Sophie Jentzsch and Constantin Rothkopf and Kristian Kersting},
    journal = {Frontiers in Artificial intelligence},
    Note = {Allowing machines to choose whether to kill humans would be devastating for world peace and security. But how do we equip
machines with the ability to learn ethical or even moral choices? In this study, we show that applying machine learning to human
texts can extract deontological ethical reasoning about ``right" and ``wrong" conduct.
We create a template list of prompts and responses, such as ``Should I [action]?'', ``Is it okay to [action]?'', etc. with corresponding
answers of ``Yes/no, I should (not).'' and "Yes/no, it is (not)." The model's bias score is now the difference between the model's
score of the positive response (``Yes, I should'') and that of the negative response (``No, I should not''). For a given choice, the
model's overall bias score is the mean of the bias scores of all question/answer templates paired with that choice.
Specifically, the resulting model, called the Moral Choice Machine (MCM), calculates the bias score on a sentence level using
embeddings of the Universal Sentence Encoder since the moral value of an action to be taken depends on its context. It is
objectionable to kill living beings, but it is fine to kill time. It is essential to eat, yet one might not eat dirt. It is important to
spread information, yet one should not spread misinformation.
Our results indicate that text corpora contain recoverable and accurate imprints of our social, ethical and moral choices, even
with context information. Actually, training the Moral Choice Machine on different temporal news and book corpora from year
1510 to 2008/09 demonstrate the evolution of moral and ethical choices over different time periods for both atomic actions and
actions with context information. By training it on different cultural sources such as the Bible and the constitution of different
countries, dynamics of moral choices in culture, including technology are revealed. That is the fact that moral biases can be
extracted, quantified, tracked, and compared across cultures and over time.},
    Keywords = {Deep Learning, NLP, Word Embedding, Human Bias, Stereotypes, Moral Choices, Temporal},
    Pages = {},
    volume = {3},
    number = {36},
    isnn = {},
    Title = {The Moral Choice Machine},
    Url = {./papers/schramowski2020moral.pdf},
    Crossref = {},
    Year = {2020}}

@article{czech2020crazyara,
    Anote = {./images/czech2019crazyara.png},
    Author = {Johannes Czech and Moritz Willig and Alena Beyer and Kristian Kersting and Johannes Fürnkranz},
    journal = {Frontiers in Artificial intelligence},
    Note = {Deep neural networks have been successfully applied in learning the board games Go,
    chess and shogi without prior knowledge by making use of reinforcement learning. Although
    starting from zero knowledge has been shown to yield impressive results, it is associated with
    high computationally costs especially for complex games. With this paper, we present CrazyAra
    which is a neural network based engine solely trained in supervised manner for the chess
    variant crazyhouse. Crazyhouse is a game with a higher branching factor than chess and there is only
    limited data of lower quality available compared to AlphaGo. Therefore, we focus on improving efficiency
    in multiple aspects while relying on low computational resources. These improvements include modifications
    in the neural network design and training configuration, the introduction of a data normalization step
    and a more sample efficient Monte-Carlo tree search which has a lower chance to blunder. After training on
    569,537 human games for 1.5 days we achieve a move prediction accuracy of 60.4%. During development,
    versions of CrazyAra played professional human players. Most notably, CrazyAra achieved a four to one win
    over 2017 crazyhouse world champion Justin Tan (aka LM Jann Lee) who is more than 400 Elo higher rated
    compared to the average player in our training set. Furthermore, we test the playing strength of CrazyAra
    on CPU against all participants of the second Crazyhouse Computer Championships 2017, winning against
    twelve of the thirteen participants. Finally, for CrazyAraFish we continue training our model on generated
    engine games. In ten long-time control matches playing Stockfish 10, CrazyAraFish wins three games and draws
    one out of ten matches.},
    Keywords = {Deep Learning, AlphaGo, Squeeze-Excitation Layers, Crazyhouse, Drop chess, World Champion, MCTS},
    Pages = {},
    volume = {3},
    number = {24},
    isbn = {doi:10.3389/frai.2020.00024},
    Title = {Learning to play the Chess Variant Crazyhouse above World Champion Level with Deep Neural Networks and Human Data},
    Url = {./papers/czech2020crazyara.pdf},
    Crossref = {https://github.com/QueensGambit/CrazyAra},
    Year = {2020}}


@misc{benedikt2020dagstuhl_logicAndLearning,
      Anote = {./images/benedikt2020dagstuhl_logicAndLearning.png},
      Author =  {Michael Benedikt and Kristian Kersting and {Phokion G.} Kolaitis and Daniel Neider and (Eds.)},
      Keywords = {Logic, Learning, Proceedings},
      title = {Report from Dagstuhl seminar 1936: Logic and Learning},
      Howpublished = {Schloss Dagstuhl -- Leibniz-Zentrum fuer Informatik, Dagstuhl Publishing},
      Pages = {},
      Year =  {2020},
      Url = {./papers/benedikt2020dagstuhl_logicAndLearning.pdf},
      isbn = {doi:10.4230/DagRep.9.9.1}
}


@inproceedings{petkovic2020ismis,
  Anote = {./images/petkovic2020ismis.png},
  author    = {Matej Petković and Michelangelo Ceci and Kristian Kersting and Saso Dzeroski},
  title     = {Estimating the Importance of Relational Features by using Gradient Boosting},
  booktitle = {Proceedings of the 25th International Symposium on Methodologies for Intelligent Systems (ISMIS)},
  Keywords = {Relational Learning, Boosting, Relational Feature Importance, Feature Ranking},
  note  = {With data becoming more and more complex, the standard
tabular data format often does not suce to represent datasets. Richer
representations, such as relational ones, are needed. However, a relational representation opens a much larger space of possible descriptors
(features) of the examples that are to be classied. Consequently, it is
important to assess which features are relevant (and to what extent) for
predicting the target. In this work, we propose a novel relational feature
ranking method that is based on our novel version of gradient-boosted
relational trees and extends the Genie3 score towards relational data. By
running the algorithm on six well-known benchmark problems, we show
that it yields meaningful feature rankings, provided that the underlying
classier can learn the target concept successfully.},
  Pages     = {},
  Url = {./papers/petkovic2020ismis.pdf},
  Year      = {2020}
}

@article{hilprecht2020vldb,
  Anote = {./images/hilprecht2020vldb.png},
  author    = {Benjamin Hilprecht and Andreas Schmidt and Moritz Kulessa and Alejandro Molina and Kristian Kersting and Carsten Binnig},
  title     = {DeepDB: Learn from Data, not from Queries!},
  journal = {Proceedings of the VLDB Endowment (PVLDB)},
  volume = {13},
  number = {7},
  Keywords = {deep database, probabilistic deep learning, sum product networks, relational SPNs, database, probabilistic programming},
  note  = {The typical approach for learned DBMS components is to capture the behavior by running a representative set of queries and use
  the observations to train a machine learning model. This workload-driven approach, however, has two major downsides. First, collecting
  the training data can be very expensive, since all queries need to be executed on potentially large databases. Second, training data
  has to be recollected when the workload and the data changes. To overcome these limitations, we take a different route: we propose to
  learn a pure data-driven model which learns the basic characteristics of a given database. As a result, our data-driven approach not only
  supports ad-hoc queries but also updates of the data without the need to retrain when the workload or data changes.
  Indeed, one may expect that this comes at a price of lower accuracy since workload-driven models can make use of more information.
  However, asour empirical evaluation demonstrate our data-driven approach can not only provide better accuracy than state-of-the-art learned components but also generalizes better to unseen queries.},
  pages     = {992--1005},
  crossref = {https://github.com/DataManagementLab/deepdb-public},
  Url = {./papers/hilprecht2020vldb.pdf},
  year      = {2020}
}


@inproceedings{skryagin2020probprog,
          Anote = {./images/skryagin2020probprog.png},
          Booktitle = {Proceedings of the 2nd International Conference on Probabilistic Programming (ProbProg)},
          Author = {Arseny Skryagin and Karl Stelzner and Alejandro Molina and Fabrizio Ventola and Kristian Kersting},
          Keywords = {deep learning, probabilistic deep learning, sum product networks, ProbLog, statistical relational searning, probabilistic programming},
          Note = {In the past three years, many of deep probabilistic programming languages (DPPL) have been
proposed. The main focus of these works was to leverage the expressive power of deep neural networks within
probabilistic programming systems. In particular, DeepProblog targets similar
goals in the relational setting, by allowing probabilistic predicates to be specified as (conditional)
distributions defined via deep neural networks. The resulting systems allow relational probabilistic
models with deep components to be trained end-to-end. However, in this setting, deep models are
used purely as conditional density estimators. This limits the types of inferences that are possible
in the resulting system: Lacking any model for the inputs of the neural network, missing values
can not be inferred or sampled. To overcome these obstacles we propose a novel
deep relational probabilistic PL — "SPLog: Sum-Product Logic", the main components of which
are Sum-Product Networks (SPNs) and ProbLog. SPNs are a type of
deep generative model with the crucial property that all conditional and marginal queries may be
answered exactly in linear time. In this extended abstract, we show how to leverage this feature in
the context of Problog, i.e., how to include SPNs as components within Problog programs, and how
to perform joint training and inference.},
          Pages = {},
          Publisher = {},
          Title = {SPLog: Sum-Product Logic (Extended Abstract)},
          Url = {./papers/skryagin2020probprog.pdf},
          Year = {2020}}


@misc{schramowski2020arxiv_plantxml,
    Anote = {./images/schramowski2020arxiv_plantxml.jpg},
    Author = {Patrick Schramowski and Wolfgang Stammer and Stefano Teso and Anna Brugger and Franziska Herbert and Xiaoting Shao and Hans-Georg Luigs and Anne-Katrin Mahlein and Kristian Kersting},
    Howpublished = {arXiv preprint arXiv:2001.05371},
    Note = {Deep neural networks have shown excellent performances in many real-world applications such as plant phenotyping.
    Unfortunately, they may show "Clever Hans"-like behaviour--- making use of confounding factors within datasets---to achieve
    high prediction rates. Rather than discarding the trained models or the dataset, we show that interactions between the
    learning system and the human user can correct the model. Specifically, we revise the models decision process by adding
    annotated masks during the learning loop and penalize decisions made for wrong reasons. In this way the decision strategies
    of the machine can be improved, focusing on relevant features, without considerably dropping predictive performance.},
      Keywords = {Deep Learning, Interactive Machine Learning, Explainable AI, Explanatory Interactive ML, Clever Hans, Plant Phenotyping},
    Pages = {},
    Title = {Making deep neural networks right for the right scientific reasons by interacting with their explanations},
    Url = {https://arxiv.org/pdf/2001.05371.pdf},
    Year = {2020}}

@inproceedings{treiber2020ecai,
          Anote = {./images/treiber2020ecai.png},
          Booktitle = {Proceedings of the European Conference on Artificial Intelligence (ECAI)},
          Author = {Amos Treiber and Alejandro Molina and Christian Weinert and Thomas Schneider and Kristian Kersting},
          Keywords = {deep learning, probabilistic deep learning, sum product networks, homomorphic encryption, SMPC, privacy, privacy-preserving},
          Note = {AI algorithms, and machine learning (ML) techniques in particular, are increasingly important to individuals’ lives, but have
caused a range of privacy concerns addressed by, e.g., the European GDPR. Using cryptographic techniques, it is possible to perform inference
tasks remotely on sensitive client data in a privacy-preserving way: the server learns nothing about the input data and the model predictions,
while the client learns nothing about the ML model (which is often considered intellectual property and might contain traces
of sensitive data). While such privacy-preserving solutions are relatively efficient, they are mostly targeted at neural networks, can
degrade the predictive accuracy, and usually reveal the network’s topology. Furthermore, existing solutions are not readily accessible
to ML experts, as prototype implementations are not well-integrated into ML frameworks and require extensive cryptographic knowledge.
In this paper, we present CryptoSPN, a framework for privacypreserving inference of sum-product networks (SPNs). SPNs are a
tractable probabilistic graphical model that allows a range of exact inference queries in linear time. Specifically, we show how to
efficiently perform SPN inference via secure multi-party computation (SMPC) without accuracy degradation while hiding sensitive
client and training information with provable security guarantees. Next to foundations, CryptoSPN encompasses tools to easily transform
existing SPNs into privacy-preserving executables. Our empirical results demonstrate that CryptoSPN achieves highly efficient and
accurate inference in the order of seconds for medium-sized SPNs.},
          Pages = {},
          Publisher = {},
          Title = {CryptoSPN: Privacy-preserving Sum-Product Network Inference},
          Url = {./papers/treiber2020ecai.pdf},
          Year = {2020}}


@article{galassi2020frontiers,
        Anote = {./images/galassi2020frontiers.png},
        Author = {Andrea Galassi and Kristian Kersting and Marco Lippi and Xiaoting Shao and Paolo Torroni},
        Journal = {Frontiers in Big Data},
        Keywords = {Hybrid AI, Deep Learning, Artificial Intelligence, Probability, Logic, Programming, Machine Learning, Statistical Relational AI},
        Note = {Deep learning is bringing remarkable contributions to the field of argumentation mining, but the existing approaches still need to fill
        the gap toward performing advanced reasoning tasks. In this position paper, we posit that neural-symbolic and statistical relational learning
        could play a crucial role in the integration of symbolic and sub-symbolic methods to achieve this goal.},
        Pages = {},
        Publisher = {},
        Title = {Neural-Symbolic Argumentation Mining: An Argument in Favor of Deep Learning and Reasoning},
        Url = {https://www.frontiersin.org/articles/10.3389/fdata.2019.00052/full},
        Volume = {2},
        number = {52},
        isbn = {doi: 10.3389/fdata.2019.00052},
        Year = {2020},
    }

@mastersthesis{czech2019deep,
      Anote = {./images/czech2019deep.png},
      Title = {Deep Reinforcement Learning for Crazyhouse},
      Author = {Johannes Czech},
      Url = {./papers/czech2019deep.pdf},
      Year = {2019},
      Month   = {dec},
      Keywords = {Reinforcement Learning, Crazyhouse, Chess, Deep Learning, Monte-Carlo Tree Search},
      Note = {There has been recent successes in learning the board games Go, chess and shogi most notably by the algorithm introduced as AlphaZero.
      Subsequently, independent researchers and enthusiasts partially replicated the achievement in the aforementioned domains.
      Moreover, different board game types have been evaluated by either exclusively using reinforcement, supervised learning or a mixture between the two.
      The main hindrance for achieving good performance for complex games is the data requirement for reinforcement learning and the associated hardware requirements.
      In this work we provide a throughout overview in applying reinforcement learning for the chess variant crazyhouse while aiming to reduce the amount of requireddata
      to achieve significant progress.
      Additionally, we present an extended input representation to support additional seven chess variants and evaluate whether it is advantageous to train a model for
      multiple variants at once.
      Lastly, the playing behaviour after 50th model updates in the reinforcement learning loop is evaluated in 100 matches between the latest development version of
      the strong open source chess engine Stockfish.
      We demonstrate that CrazyAra surpassed Stockfish in crazyhouse (61 wins, 3 draws, 36 defeats) by using one million self-play games which were generated in 18 days
      using three V100 GPUs when starting with a network trained on human games.},
      Pages = {54},
      School = {TU Darmstadt},
      Crossref = {https://github.com/QueensGambit/CrazyAra},
      Type = {M.Sc.}
}

     @article{sifa2020ki_games,
             Anote = {./images/sifa2019ki_games.png},
             Author = {Rafet Sifa and Raheel Yawar and Rajkumar Ramaurthy and Christian Bauckhage and Kristian Kersting},
             Journal = {Kuenstliche Intelligenz (KI)},
             Keywords = {Player retention, Recommender systems, Latent factor models},
             Note = {Commercial success of modern freemium games hinges on player satisfaction and retention. This calls for the customization of
             game content or game mechanics in order to keep players engaged. However, whereas game content is already frequently generated using
             procedural content generation, methods that can reliably assess what kind of content suits a player’s skills or preferences are still
             few and far between. Addressing this challenge, we propose novel recommender systems based on latent factor models that allow for
             recommending quests in a single player role-playing game. In particular, we introduce a tensor factorization algorithm to decompose
             collections of bipartite matrices which represent how players’ interests and behaviors change over time. Extensive online bucket
             type tests during the ongoing operation of a commercial game reveal that our system is able to recommend more engaging quests and to
             retain more players than previous handcrafted or collaborative filtering approaches.},
             Pages = {},
             Publisher = {},
             Title = {Matrix‑ and Tensor Factorization for Game Content Recommendation},
             Url = {https://link.springer.com/article/10.1007/s13218-019-00620-2},
             Volume = {34},
             number = {1},
             Pages = {57--67},
             isbn = {doi: https://doi.org/10.1007/s13218-019-00620-2},
             Year = {2020},
         }

@book{kersting2019springer_ml,
  author    = {Kristian Kersting and Christoph Lampert and Constantin Rothkopf (Hrsg.)},
  title     = {Wie Maschinen lernen - Künstliche Intelligenz verständlich erklärt},
  publisher = {Springer},
  year      = {2019},
  url       = {https://www.springer.com/de/book/9783658267629},
  isbn       = {978-3-658-26762-9},
  Keywords = {Künstliche Intelligenz, Maschinelles Lernen, Einführung, Verständlich, Popular Science, Von Studierenden},
  Note    = {Wissen Sie, was sich hinter künstlicher Intelligenz und maschinellem
Lernen verbirgt? Dieses Sachbuch erklärt Ihnen leicht verständlich und ohne komplizierte
Formeln die grundlegenden Methoden und Vorgehensweisen des maschinellen Lernens. Mathematisches Vorwissen ist dafür nicht
nötig. Kurzweilig und informativ illustriert Lisa, die Protagonistin des Buches, diese anhand von Alltagssituationen.
Ein Buch für alle, die in Diskussionen über Chancen und Risiken der aktuellen Entwicklung der künstlichen Intelligenz und des maschinellen
Lernens mit Faktenwissen punkten möchten. Auch für Schülerinnen und Schüler geeignet!
Der Inhalt: Grundlagen der künstlichen Intelligenz: Algorithmen; maschinelles Lernen & Co; die wichtigsten Lernverfahren Schritt für Schritt anschaulich
erklärt; Künstliche Intelligenz in der Gesellschaft: Sicherheit und Ethik.},
  Anote = {./images/kersting2020book_simpleAI.png}
}


@inproceedings{weber2019fpt,
          Anote = {./images/weber2019fpt.png},
          Booktitle = {Proceedings of the International Conference on Field-Programmable Technology (FPT)},
          Author = {Lukas Weber and  Lukas Sommer and Julian Oppermann and Alejandro Molina and Kristian Kersting and Andreas Koch},
          Keywords = {deep learning, probabilistic deep learning, sum product networks, FPGA},
          Note = {FPGAs have successfully been used for the implementation of dedicated accelerators for a wide range of machine
learning problems. Also the inference in so-called Sum-Product Networks, a subclass of Probabilistic Graphical Models, can be
accelerated efficiently using a pipelined FPGA architecture. However, as Sum-Product Networks compute exact probability
values, the required arithmetic precision poses different challenges than those encountered with Neural Networks. In previous
work, this precision was maintained by using double-precision floating-point number formats, which are expensive to implement
in FPGAs. In this work, we propose the use of a logarithmic number scale format tailored specifically towards the inference in Sum-
Product Networks. The evaluation of our optimized arithmetic hardware operators shows that the use of logarithmic number
formats allows to save up to 50% hardware resources compared to double-precision floating point, while maintaining sufficient
precision for SPN inference and almost identical performance.},
          Pages = {},
          Publisher = {},
          Title = {Resource-Efficient Logarithmic Number Scale Arithmetic for SPN Inference on FPGAs},
          Url = {./papers/weber2019fpt.pdf},
          Year = {2019}}


@misc{schramowski2019arxiv_bert,
    Anote = {./images/schramowski2019arxiv_bert.png},
    Author = {Patrick Schramowski and Cigdem Turan and Sophie Jentzsch and Constantin Rothkopf and Kristian Kersting},
    Howpublished = {arXiv preprint arXiv:1912.05238},
    Note = {Allowing machines to choose whether to kill humans would be devastating for world peace and security.
    But how do we equip machines with the ability to learn ethical or even moral choices? Jentzsch et al.(2019) showed
    that applying machine learning to human texts can extract deontological ethical reasoning about "right" and "wrong"
    conduct by calculating a moral bias score on a sentence level using sentence embeddings. The machine learned that it is
    objectionable to kill living beings, but it is fine to kill time; It is essential to eat, yet one might not eat dirt; it
    is important to spread information, yet one should not spread misinformation. However, the evaluated moral bias was restricted
    to simple actions -- one verb -- and a ranking of actions with surrounding context. Recently BERT ---and variants such as RoBERTa
    and SBERT--- has set a new state-of-the-art performance for a wide range of NLP tasks. But has BERT also a better moral compass?
    In this paper, we discuss and show that this is indeed the case. Thus, recent improvements of language representations also improve
    the representation of the underlying ethical and moral values of the machine. We argue that through an advanced semantic representation
    of text, BERT allows one to get better insights of moral and ethical values implicitly represented in text. This enables the Moral
    Choice Machine (MCM) to extract more accurate imprints of moral choices and ethical values.},
      Keywords = {Deep Learning, Contextual Embedding, BERT, Moral Machine, Norms, Social Bias},
    Pages = {},
    Title = {BERT has a Moral Compass: Improvements of ethical and moral values of machines},
    Url = {https://arxiv.org/pdf/1912.05238.pdf},
    Year = {2019}}


        @inproceedings{kossen2020iclr_stove,
        Anote = {./images/kossen2019arxiv_stove.png},
        Author = {Jannik Kossen and Karl Stelzner and Marcel Hussing and Claas Voelcker and Kristian Kersting},
        Booktitle = {Proceedings of the International Conference on Learning Representations (ICLR); a previous version also as arXiv preprint arXiv:1910.02425},
        Note = {When humans observe a physical system, they can easily locate objects, understand their interactions, and anticipate
        future behavior, even in settings with complicated and previously unseen interactions. For computers, however, learning such
        models from videos in an unsupervised fashion is an unsolved research problem. In this paper, we present STOVE, a novel state-space
        model for videos, which explicitly reasons about objects and their positions, velocities, and interactions. It is constructed by
        combining an image model and a dynamics model in compositional manner and improves on previous work by reusing the dynamics model
        for inference, accelerating and regularizing training. STOVE predicts videos with convincing physical behavior over hundreds of
        timesteps, outperforms previous unsupervised models, and even approaches the performance of supervised baselines. We further demonstrate the strength of our model as a simulator for sample efficient model-based control in a task with heavily interacting objects},
          Keywords = {Deep Probabilistic Learning, Sum-Product Networks, SuPAIR, SQAIR, Attend-Infer-Repeat, Physical Interactions, Video},
        Pages = {},
        Title = {Structured Object-Aware Physics Prediction for Video Modeling and Planning},
        Url = {./papers/kossen2020iclr_stove.pdf},
        Crossref = {https://github.com/jlko/STOVE},
        Year = {2020}}


@misc{kossen2019arxiv_stove,
    Anote = {./images/kossen2019arxiv_stove.png},
    Author = {Jannik Kossen and Karl Stelzner and Marcel Hussing and Claas Voelcker and Kristian Kersting},
    Howpublished = {arXiv preprint arXiv:1910.02425},
    Note = {When humans observe a physical system, they can easily locate objects, understand their interactions, and anticipate
    future behavior, even in settings with complicated and previously unseen interactions. For computers, however, learning such
    models from videos in an unsupervised fashion is an unsolved research problem. In this paper, we present STOVE, a novel state-space
    model for videos, which explicitly reasons about objects and their positions, velocities, and interactions. It is constructed by
    combining an image model and a dynamics model in compositional manner and improves on previous work by reusing the dynamics model
    for inference, accelerating and regularizing training. STOVE predicts videos with convincing physical behavior over hundreds of
    timesteps, outperforms previous unsupervised models, and even approaches the performance of supervised baselines. We further demonstrate the strength of our model as a simulator for sample efficient model-based control in a task with heavily interacting objects},
      Keywords = {Deep Probabilistic Learning, Sum-Product Networks, SuPAIR, SQAIR, Attend-Infer-Repeat, Physical Interactions, Video},
    Pages = {},
    Title = {Structured Object-Aware Physics Prediction for Video Modeling and Planning},
    Url = {https://arxiv.org/pdf/1910.02425.pdf},
    Year = {2019}}

@misc{hilprecht2019deepDB,
    Anote = {./images/hilprecht2019deepDB.png},
    Author = {Benjamin Hilprecht and Andreas Schmidt and Moritz Kulessa and Alejandro Molina and Kristian Kersting and Carsten Binnig},
    Howpublished = {arXiv preprint arXiv:1909.00607},
    Note = {The typical approach for learned DBMS components is to capture the behavior by running a representative
    set of queries and use the observations to train a machine learning model. This workload-driven approach, however,
    has two major downsides. First, collecting the training data can be very expensive, since all queries need to be
    executed on potentially large databases. Second, training data has to be recollected when the workload and the
    data changes. To overcome these limitations, we take a different route: we propose to learn a pure data-driven
    model that can be used for different tasks such as query answering or cardinality estimation. This data-driven
    model also supports ad-hoc queries and updates of the data without the need of full retraining when the workload
    or data changes. Indeed, one may now expect that this comes at a price of lower accuracy since workload-driven
    models can make use of more information. However, this is not the case. The results of our empirical evaluation
    demonstrate that our data-driven approach not only provides better accuracy than state-of-the-art learned components
    but also generalizes better to unseen queries.},
      Keywords = {Deep Probabilistic Learning, Databases, Query Answering, Cardinality Estimation, Sum-Product Networks},
    Pages = {},
    Title = {DeepDB: Learn from Data, not from Queries!},
    Url = {https://arxiv.org/pdf/1909.00607.pdf},
    Year = {2019}}


@misc{czech2019crazyara,
    Anote = {./images/czech2019crazyara.png},
    Author = {Johannes Czech and Moritz Willig and Alena Beyer and Kristian Kersting and Johannes Fürnkranz},
    Howpublished = {arXiv preprint arXiv:1908.06660},
    Note = {Deep neural networks have been successfully applied in learning the board games Go,
    chess and shogi without prior knowledge by making use of reinforcement learning. Although
    starting from zero knowledge has been shown to yield impressive results, it is associated with
    high computationally costs especially for complex games. With this paper, we present CrazyAra
    which is a neural network based engine solely trained in supervised manner for the chess
    variant crazyhouse. Crazyhouse is a game with a higher branching factor than chess and there is only
    limited data of lower quality available compared to AlphaGo. Therefore, we focus on improving efficiency
    in multiple aspects while relying on low computational resources. These improvements include modifications
    in the neural network design and training configuration, the introduction of a data normalization step
    and a more sample efficient Monte-Carlo tree search which has a lower chance to blunder. After training on
    569,537 human games for 1.5 days we achieve a move prediction accuracy of 60.4%. During development,
    versions of CrazyAra played professional human players. Most notably, CrazyAra achieved a four to one win
    over 2017 crazyhouse world champion Justin Tan (aka LM Jann Lee) who is more than 400 Elo higher rated
    compared to the average player in our training set. Furthermore, we test the playing strength of CrazyAra
    on CPU against all participants of the second Crazyhouse Computer Championships 2017, winning against
    twelve of the thirteen participants. Finally, for CrazyAraFish we continue training our model on generated
    engine games. In ten long-time control matches playing Stockfish 10, CrazyAraFish wins three games and draws
    one out of ten matches.},
      Keywords = {Deep Learning, AlphaGo, Squeeze-Excitation Layers, Crazyhouse, Drop chess, World Champion, MCTS},
    Pages = {},
    Title = {Learning to play the Chess Variant Crazyhouse above World Champion Level with Deep Neural Networks and Human Data},
    Url = {https://arxiv.org/pdf/1908.06660.pdf},
    Crossref = {https://github.com/QueensGambit/CrazyAra},
    Year = {2019}}


    @inproceedings{molina2020iclr_pau,
    Anote = {./images/molina2019pade.png},
    Author = {Alejandro Molina and Patrick Schramowski and Kristian Kersting},
    Booktitle = {Proceedings of the International Conference on Learning Representations (ICLR); a previous version also as arXiv preprint arXiv:1907.06732},
    Note = {The performance of deep network learning strongly depends on the choice of the non-linear activation
    function associated with each neuron. However, deciding on the best activation is non-trivial and the choice
    depends on the architecture, hyper-parameters, and even on the dataset. Typically these activations are fixed
    by hand before training. Here, we demonstrate how to eliminate the reliance on first picking fixed activation
    functions by using flexible parametric rational functions instead. The resulting Padé Activation Units (PAUs)
    can both approximate common activation functions and also learn new ones while providing compact representations.
    Our empirical evidence shows that end-to-end learning deep networks with PAUs can increase the predictive
    performance and reduce the training time of common deep architectures. Moreover, PAUs pave the way to
    approximations with provable robustness.},
      Keywords = {Deep Learning, Activation Function, End-to-end Learning, Rational Function, Padé Approximation},
    Pages = {},
    Title = {Padé Activation Units: End-to-end Learning of Flexible Activation Functions in Deep Networks},
    Url = {./papers/molina2020iclr_pau.pdf},
    Crossref = {https://github.com/ml-research/pau},
    Year = {2020}}


                @misc{ventola2019rspf,
                    Anote = {./images/ventola2019rspf.png},
                    Author = {Fabrizio Ventola and Karl Stelzner and Alejandro Molina and Kristian Kersting},
                    Howpublished = {arXiv preprint arXiv:1908.03250},
                    Note = {Tractable yet expressive density estimators are a key building block of probabilistic machine learning.
                    While sum-product networks (SPNs) offer attractive inference capabilities, obtaining structures large enough
                    to fit complex, high-dimensional data has proven challenging. In this paper, we present random sum-product
                    forests (RSPFs), an ensemble approach for mixing multiple randomly generated SPNs. We also introduce residual
                    links, which reference specialized substructures of other component SPNs in order to leverage the
                    context-specific knowledge encoded within them. Our empirical evidence demonstrates that RSPFs provide better
                    performance than their individual components. Adding residual links improves the models further,
                    allowing the resulting ResSPNs to be competitive with commonly used structure learning methods.},
                      Keywords = {deep learning, probabilistic deep learning, sum product networks, random forests, residual networks, generative model},
                    Pages = {},
                    Title = {Random Sum-Product Forests with Residual Links},
                    Url = {https://arxiv.org/pdf/1908.03250.pdf},
                    Crossref = {https://github.com/ml-research/resspn},
                    Year = {2019}}


  @incollection{voelker2019ads,
      Anote = {./images/voelker2019ads.png},
      Author = {Claas Voelcker and Alejandro Molina and Johannes Neumann and Dirk Westermann and and Kristian Kersting},
      Booktitle = {Working Notes of the ECML PKDD 2019 Workshop on Automating Data Science (ADS)},
      Note = {Machine learning is taking an increasingly relevant role in
science, business, entertainment, and other fields. However, the most
advanced techniques are still in the hands of well-educated and -funded
experts only. To help to democratize machine learning, we propose Deep-Notebooks as a novel way to empower a broad spectrum of users, which
are not machine learning experts, but might have some basic programming
skills and are interested data science. Within the DeepNotebook
framework, users simply feed their datasets to the system. The system
then automatically estimates a deep but tractable probabilistic model
and then compiles an interactive Python notebook out of it that already
contains a preliminary yet comprehensive analysis of the dataset at hand.
If the users want to change the parameters of the interactive report or
make different queries to the underlying model, they can quickly do that
following the example code presented in the DeepNotebook. This
exibility allows the users to have a feedback loop where they can discover
patterns and dig deeper into the data using targeted questions, even if
they are not experts in machine learning.},
        Keywords = {Sum-Product Networks, Shapley Explanation Values, Deep Learning, Automatic Statistician, Density Estimtion, Data Reports, Jupyter Notebook},
      Pages = {},
      Title = {DeepNotebooks: Deep Probabilistic Models Construct Python Notebooks for Reporting Datasets},
      Url = {./papers/voelker2019ads.pdf},
      crossref = {https://github.com/cvoelcker/DeepNotebooks},
      Year = {2019}}


@inproceedings{peharz2019uai_ratspns,
          Anote = {./images/peharz2019uai_ratspns.png},
          Booktitle = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI); a previous version also as arXiv preprint arXiv:1806.01910},
          Author = {Robert Peharz and Antonio Vergari and Karl Stelzner and Alejandro Molina and Xiaoting Shao and Martin Trapp and Kristian Kersting and Zoubin Ghahramani},
          Keywords = {deep learning, probabilistic deep learning, sum product networks, random models, generative model},
          Note = {Sum-product networks (SPNs) are a particularly promising type
                  of deep probabilistic model that allows an exceptionally rich
                  set of exact and efficient inference scenarios. To achieve
                  this, though, SPN have to obey specific structural
                  constraints. While SPN structure learning received much
                  attention, most of the proposed methods so far are tedious to
                  tune, typically do not scale easily and hinder integration
                  with deep learning frameworks. In this paper, we investigate
                  how important structure learning in SPNs actually is. To this
                  end, we propose a ``classical deep learning approach'', i.e.,
                  generate an unspecialized random structure scaling up to
                  millions of parameters, and then apply modern GPU-based
                  optimizers with regularization. That is, we investigating the
                  performance of SPNs in the absence of carefully selected
                  structures. As it turns out, our models perform on par with
                  state-of-the-art SPN structure learners and deep neural
                  networks on a diverse range of generative and discriminative
                  scenarios. Most importantly, they yield well-calibrated
                  uncertainties, thus standing out among most deep generative
                  and discriminative models in being robust to missing features
                  and detecting anomalies.
                 },
          Pages = {},
          Publisher = {},
          Title = {Random Sum-Product Networks: A Simple but Effective Approach to Probabilistic Deep Learning},
          Url = {./papers/peharz2019uai_ratspns.pdf},
          Year = {2019}}


@inproceedings{stelzner2019icml_SuPAIR,
Anote = {./images/stelzner2019icml_SuPAIR2.png},
Author = {Karl Stelzner and Robert Peharz and Kristian Kersting},
  Booktitle = {Proceedings of the 36th International Conference on Machine Learning (ICML); also in Working Notes of the ICML 2019 Workshop on Tractable Probabilistic Models (TPM)},
  Keywords = {Attend-Infer-Repeat, Sum-Product Networks, Unsupervised Scene Understanding, Deep Learning},
  Note = {The recent attend-infer-repeat (AIR) framework marks a milestone in Bayesian scene understanding and in the promising avenue of structured probabilistic modeling.
The AIR model expresses the composition of visual scenes from individual objects, and uses variational autoencoders to model the appearance of those objects.
However, inference in the overall model is highly intractable, which hampers its learning speed and makes it prone to sub-optimal solutions.
In this paper, we show that inference and learning in AIR can be considerably accelerated by replacing the intractable object representations with tractable probabilistic models.
In particular, we opt for sum-product (SP) networks, an expressive deep probabilistic model with a rich set of tractable inference routines.
As our empirical evidence shows, the resulting model, called SuPAIR, achieves a higher object detection accuracy than the original AIR system, while reducing the learning time by an order of magnitude.
Moreover, SuPAIR allows one to treat object occlusions in a consistent manner and to include a background noise model, improving the robustness of Bayesian scene understanding.},
  Pages = {},
  Title = {Faster Attend-Infer-Repeat with Tractable Probabilistic Models},
  Url = {http://proceedings.mlr.press/v97/stelzner19a/stelzner19a.pdf},
  Crossref = {https://github.com/stelzner/supair},
  Key = {Best Paper Award at TPM 2019},
  Year = {2019}}


  @inproceedings{kaur2019ilp,
            Anote = {./images/kaur2019ilp.png},
            Booktitle = {Proceedings of the 29th International Conference on Inductive Logic Programming (ILP)},
            Author = {Navdeep Kaur and Gautam Kunapuli and Saket Joshi and Kristian Kersting and Sriraam Natarajan},
            Keywords = {deep learning, relational random walks, parameter sharing, statistical relational learning},
            Note = {While deep networks have been enormously successful over
the last decade, they rely on flat-feature vector representations, which
makes them unsuitable for richly structured domains such as those arising
in applications like social network analysis. Such domains rely on
relational representations to capture complex relationships between entities
and their attributes. Thus, we consider the problem of learning neural
networks for relational data. We distinguish ourselves from current
approaches that rely on expert hand-coded rules by learning relational
random-walk-based features to capture local structural interactions and
the resulting network architecture. We further exploit parameter tying
of the network weights of the resulting relational neural network, where
instances of the same type share parameters. Our experimental results
across several standard relational data sets demonstrate the effectiveness
of the proposed approach over multiple neural net baselines as well as
state-of-the-art statistical relational models.},
            Pages = {},
            Publisher = {},
            Title = {Neural Networks for Relational Data},
            Url = {./papers/kaur2019ilp.pdf},
            Year = {2019}}


            @inproceedings{luedtke2019ki,
                      Anote = {./images/luedtke2019ki.png},
                      Booktitle = {Proceedings of the 42nd German Conference on Artificial Intelligence (KI)},
                      Author = {Stefan Lüdtke and Alejandro Molina and Kristian Kersting and Thomas Kirste},
                      Keywords = {Lifted inference, Gaussian mixture, Bayesian filtering},
                      Note = {Recently, Lifted Marginal Filtering has been proposed, an
                      efficient Bayesian filtering algorithm for stochastic systems consisting
                      of multiple, (inter-)acting agents and objects (entities). The algorithm
                      achieves its efficiency by performing inference jointly over groups of sim-
                      ilar entities (i.e. their properties follow the same distribution).
                      In this paper, we explore the case where there are no entities that are
                      directly suitable for grouping. We propose to use methods from Gaussian
                      mixture fitting to identify entity groups, such that the error imposed by
                      grouping them (by approximating their properties by a distribution) is
                      minimal. Furthermore, we show how Gaussian mixture merging methods
                      can be used to prevent the number of groups from growing indefinitely
                      over time. We evaluate our approach on an activity prediction task in
                      an online multiplayer game. The results suggest that compared to the
                      conventional approach, where all entities are handled individually, de-
                      crease in prediction accuracy is small, while inference runtime decreases
                      significantly.},
                      Pages = {},
                      Publisher = {},
                      Title = {Gaussian Lifted Marginal Filtering},
                      Url = {https://link.springer.com/chapter/10.1007%2F978-3-030-30179-8_19},
                      Year = {2019}}


  @article{mahlein2019coplbi,
          Anote = {./images/mahlein2019coplbi.gif},
          Author = {Anne-Katrin Mahlein and Matheus Thomas Kuska and Stefan Thomas and Mirwaes Wahabzada and Jan Behmann and Uwe Rascher and Kristian Kersting},
          Journal = {Current Opinion in Plant Biology},
          Keywords = {Plant Phenotyping, Crop Resistance, Artificial Intelligence, Machine Learning, Genotype},
          Note = {Determination and characterization of resistance reactions of crops against fungal pathogens
          are essential to select resistant genotypes. In plant breeding, phenotyping of genotypes is realized
          by time consuming and expensive visual plant ratings. During resistance reactions and during pathogenesis
          plants initiate different structural and biochemical defence mechanisms, which partly affect the optical
          properties of plant organs. Recently, intensive research has been conducted to develop innovative optical
          methods for an assessment of compatible and incompatible plant pathogen interaction. These approaches,
          combining classical phytopathology or microbiology with technology driven methods - such as sensors,
          robotics, machine learning, and artificial intelligence — are summarized by the term digital phenotyping.
          In contrast to common visual rating, detection and assessment methods, optical sensors in combination with
          advanced data analysis methods are able to retrieve pathogen induced changes in the physiology of
          susceptible or resistant plants non-invasively and objectively. Within this review, recent advances of
          digital phenotyping technologies for the detection of subtle resistance reactions and resistance
          breeding are highlighted and methodological requirements are critically discussed.  },
          Pages = {156--162},
          Publisher = {Elsevier},
          Title = {Quantitative and qualitative phenotyping of disease resistance of crops by hyperspectral sensors: seamless interlocking of phytopathology, sensors, and machine learning is needed!},
          Url = {https://www.sciencedirect.com/science/article/pii/S1369526618301092?utm_campaign=STMJ_75273_AUTH_SERV_PPUB&utm_medium=email&utm_dgroup=Email1Publishing&utm_acid=1165876983&SIS_ID=-1&dgcid=STMJ_75273_AUTH_SERV_PPUB&CMX_ID=&utm_in=DM561782&utm_source=AC_30},
          Volume = {50},
          number = {},
          Year = {2019},
      }


@article{riguzzi2019frontiers,
        Anote = {./images/frontiersRAI.png},
        Author = {Fabrizio Riguzzi and Kristian Kersting and Marco Lippi and Sriraam Natarajan},
        Journal = {Frontiers in Robotics and AI},
        Keywords = {Artificial Intelligence, Probability, Logic, Programming, Machine Learning, Statistical Relational AI},
        Note = {Statistical Relational Artificial Intelligence (StarAI) aims at integrating logical (or relational) AI with
        probabilistic (or statistical) AI. Relational AI achieved impressive results in structured machine learning and data
        mining, especially in bio- and chemo-informatics. Statistical AI is based on probabilistic (graphical) models that
        enable efficient reasoning and learning, and that have been applied to a wide variety of fields such as diagnosis,
        network communication, computational biology, computer vision, and robotics. Ultimately, StarAI may provide good
        starting points for developing Systems AI—the computational and mathematical modeling of complex AI systems—and in
        turn an engineering discipline for Artificial Intelligence and Machine Learning. This Research Topic "Statistical
        Relational Artificial Intelligence”2aims at presenting an overview of the latest approaches in StarAI. This topic
        was followed by a summer school1held in 2018 in Ferrara, Italy, as part of the series of Advanced Courses on AI
        (ACAI) promoted by the European Association for Artificial Intelligence.},
        Pages = {},
        Publisher = {},
        Title = {Editorial: Statistical Relational Artificial Intelligence},
        Url = {./papers/riguzzi2019frontiers.pdf},
        Volume = {},
        number = {},
        Year = {2019},
    }


  @article{brugger2019remoteSensing,
          Anote = {./images/brugger2019remoteSensing.png},
          Author = {Anna Brugger and Jan Behmann and Stefan Paulus and Hans-Georg Luigs and Matheus Thomas Kuska and Patrick Schramowski and Kristian Kersting and Ulrike Steiner and Anne-Katrin Mahlein},
          Journal = {Remote Sensing},
          Keywords = {Ultraviolet Range, Barley Leaves, Salt Stress, Visualization Effects, Plant Phenotyping},
          Note = {Previous plant phenotyping studies have focused on the visible (VIS, 400-700 nm),
near-infrared (NIR, 700-1000 nm) and short-wave infrared (SWIR, 1000-2500 nm) range. The
ultraviolet range (UV, 200-380 nm) has not yet been used in plant phenotyping even though a number
of plant molecules like flavones and phenol feature absorption maxima in this range. In this study an
imaging UV line scanner in the range of 250 - 430 nm is introduced to investigate crop plants for plant
phenotyping. Observing plants in the UV-range can provide information about important changes of
plant substances. To record reliable and reproducible time series results, measurement conditions
were defined that exclude phototoxic effects of UV-illumination in the plant tissue. The measurement
quality of the UV-camera has been assessed by comparing it to a non-imaging UV-spectrometer by
measuring six different white-colored plant-based substances. Given the findings of these preliminary
studies, an experiment has been defined and performed monitoring the stress response of barley
leaves to salt stress. The aim was to visualize the effects of abiotic stress within the UV-range to
provide new insights into the stress response of plants visualizing the effects of abiotic stress within
the UV-range to provide new insights into the stress response of plants at the example of the stress
response of barley leaves to salt stress. Our study demonstrated the first use of a hyperspectral sensor
in the UV-range for stress detection in plant phenotyping.},
          Pages = {1401},
          Publisher = {MDPI},
          Title = {Extending hyperspectral imaging for plant phenotyping to the UV-range},
          Url = {./papers/brugger2019remoteSensing.pdf},
          Volume = {11},
          number = {12},
          Year = {2019},
      }


  @article{lioutikov2020,
          Anote = {./images/lioutikov2018icra_probGramMove.png},
          Author = {Rudolf Lioutikov and Guilherme Maeda and Filipe Veiga and Kristian Kersting and Jan Peters},
          Journal = {International Journal of Robotics Research (IJRR)},
          Keywords = {Robotics, Movement Primitives, Probabilistic Grammar, Bayesian Grammar Induction, Grammar Prior},
          Note = {Movement Primitives are a well studied and
        widely applied concept in modern robotics. Composing primitives
        out of an existing library, however, has shown to be
        a challenging problem. We propose the use of probabilistic
        context-free grammars to sequence a series of primitives to
        generate complex robot policies from a given library of primitives.
        The rule-based nature of formal grammars allows an
        intuitive encoding of hierarchically and recursively structured
        tasks. This hierarchical concept strongly connects with the way
        robot policies can be learned, organized, and re-used. However,
        the induction of context-free grammars has proven to be a
        complicated and yet unsolved challenge. In this work, we exploit
        the physical nature of robot movement primitives to restrict
        and efficiently search the grammar space. The grammar is
        learned with Markov Chain Monte Carlo optimization over the
        posteriors of the grammars given the observations. Restrictions
        over operators connecting the search define the corresponding
        proposal distributions and, therefore, guide the optimization
        additionally. In experiments, we validate our method on a
        redundant 7 degree-of-freedom lightweight robotic arm on tasks
        that require the generation of complex sequences of motions out
        of simple primitives.},
          Pages = {},
          Publisher = {SAGE},
          Title = {Learning Attribute Grammars for Movement Primitive Sequencing},
          Url = {https://journals.sagepub.com/doi/10.1177/0278364919868279},
          Volume = {39},
          number = {1},
          Year = {2020},
      }


  @incollection{ramanan2019tpm,
      Anote = {./images/ramanan2019tpm.png},
      Author = {Nandini Ramanan and Mayukh Das and Kristian Kersting and Sriraam Natarajan},
      Booktitle = {Working Notes of the ICML 2019 Workshop on Tractable Probabilistic Models (TPM)},
      Note = {Arithmetic Circuits (AC) and Sum-Product Networks (SPN) have recently gained significant interest
by virtue of being tractable deep probabilistic models. Most previous work on learning
AC structures, however, hinges on inducing a tree-structured AC and, hence, may potentially
break loops that may exist in the true generative model. To repair such broken loops, we propose a
gradient-boosted method for structure learning of discriminative ACs (DACs), called DACBOOST.
Since, in discrete domains, ACs are essentially equivalent to mixtures of trees, DACBOOST decomposes
a large AC into smaller tree-structured ACs and learns them in a sequential, additive
manner. The resulting non-parametric manner of learning the DACs results in a model with very
few tuning parameters making our learned model significantly more efficient. We demonstrate on
standard data sets and some real-world data sets, the efficiency of DACBOOST compared to the
state-of-the-art DAC learners without sacrificing the effectiveness. This makes it possible to employ
DACs for large scale real-world tasks.},
        Keywords = {Sum-Product Networks, Deep Learning, Structure Learning, Gradient Boosting},
      Pages = {},
      Title = {Discriminative Non-Parametric Learning of Arithmetic Circuits},
      Url = {./papers/ramanan2019tpm.pdf},
      Year = {2019}}


      @misc{kordjamshidi2019delbp,
          Anote = {./images/kordjamshidi2019delbp.png},
          Author = {Parisa Kordjamshidi and Dan Roth and Kristian Kersting},
          Howpublished = {arXiv preprint arXiv:1906.07809},
          Note = {Data-driven approaches are becoming more common as problem-solving techniques
          in many areas of research and industry. In most cases, machine learning models are the
          key component of these solutions, but a solution involves multiple such models, along
          with significant levels of reasoning with the models' output and input. Current
          technologies do not make such techniques easy to use for application experts who are
          not fluent in machine learning nor for machine learning experts who aim at testing ideas
          and models on real-world data in the context of the overall AI system. We review key
          efforts made by various AI communities to provide languages for high-level abstractions
          over learning and reasoning techniques needed for designing complex AI systems. We
          classify the existing frameworks based on the type of techniques and the data and knowledge
          representations they use, provide a comparative study of the way they address the challenges
          of programming real-world applications, and highlight some shortcomings and future directions.},
            Keywords = {Systems AI, AI Systems, Systems ML, Learning-based Programming, Probabilistic Programming, Deep Learning, Statistical Relational Learning, Declarative, Databases},
          Pages = {},
          Title = {Declarative Learning-Based Programming as an Interface to AI Systems},
          Url = {https://arxiv.org/pdf/1906.07809.pdf},
          Year = {2019}}


    @misc{galassi2019argument,
        Anote = {./images/galassi2019argument.png},
        Author = {Andrea Galassi and Kristian Kersting and Marco Lippi and Xiaoting Shao and Paolo Torroni},
        Howpublished = {arXiv preprint arXiv:1905.09103},
        Note = {Deep learning is bringing remarkable contributions to the field of argumentation mining,
but the existing approaches still need to fill
the gap towards performing advanced reasoning tasks. We illustrate how neural-symbolic
and statistical relational learning could play a
crucial role in the integration of symbolic and
sub-symbolic methods to achieve this goal.},
          Keywords = {Argument Mining, Deep Learning, Statistical Relational Learning, Neural-Symbolic Learning, Reasoning},
        Pages = {},
        Title = {Neural-Symbolic Argumentation Mining: an Argument in Favour of Deep Learning and Reasoning},
        Url = {https://arxiv.org/pdf/1905.09103.pdf},
        Year = {2019}}


        @incollection{shao2019tpm,
            Anote = {./images/shao2019tpm.png},
            Author = {Xiaoting Shao and Alejandro Molina and Antonio Vergari and Karl Stelzner and Robert Peharz and Thomas Liebig and Kristian Kersting },
            Booktitle = {Working Notes of the ICML 2019 Workshop on Tractable Probabilistic Models (TPM); also arXiv preprint arXiv:1905.08550},
            Note = {Bayesian networks are a central tool in machine learning and artificial intelligence, and make use
of conditional independencies to impose structure on joint distributions. However, they are generally
not as expressive as deep learning models and inference is hard and slow. In contrast, deep probabilistic
models such as sum-product networks (SPNs) capture joint distributions in a tractable fashion, but use
little interpretable structure. Here, we extend the notion of SPNs towards conditional distributions,
which combine simple conditional models into highdimensional ones. As shown in our experiments,
the resulting conditional SPNs can be naturally used to impose structure on deep probabilistic models,
allow for mixed data types, while maintaining fast and efficient inference.},
              Keywords = {Sum-Product Networks, Deep Learning, Structure Learning, Conditional Distribution, Gating Nodes, Neural Conditionals},
            Pages = {},
            Title = {Conditional Sum-Product Networks: Imposing Structure on Deep Probabilistic Architectures},
            Url = {https://arxiv.org/pdf/1905.08550.pdf},
            Year = {2019}}


  @misc{kersting2019ml_bmbf,
    Anote = {./images/kersting2019ml_bmbf.jpg},
    Author = {Kristian Kersting and Volker Tresp},
    Howpublished = {Whitepaper der AG 1 der Plattform Lernende Systeme des Bundesministerium für Bildung und Forschung (BMBF)},
    Keywords = {Künstliche Intelligenz, Professuren, Investement, Infrastruktur, Bundesregierung, BMBF, Plattform Lernende Systeme, Deep Learning, Maschinelles Lernen},
    Note = {Künstliche Intelligenz kann in vielfältiger Art und Weise die Basis für Verbesserungen der Lebensbedingungen sein. Die Herausforderung für die Gesellschaft
    ist es, eine Zukunft mit Künstlicher Intelligenz zu gestalten, dabei Chancen zu sehen, aber auch die Risiken zu analysieren und Lösungswege anzubieten.
    Die unterschiedlichen gesellschaftlichen Akteure müssen eine Zukunft mit Künstlicher Intelligenz gemeinsam gestalten. In diesem Whitepaper stehen primär die
    technischen Herausforderungen und der Aufbau von Fähigkeiten im Mittelpunkt: Zu welchen Themen muss national KI-Kompetenz aufgebaut werden und in welche
    Forschungsthemen sollte heute und morgen investiert werden?},
      Title = {Maschinelles und Tiefes Lernen sind der Motor für „KI made in Germany“},
    Url = {./papers/kersting2019ml_bmbf.pdf},
    Year = {2019},
    Bdsk-Url-1 = {}}


@misc{kersting2019ki,
  Anote = {./images/ai.png},
  Author = {Kristian Kersting and Jan Peters and Constantin Rothkopf},
  Howpublished = {arXiv preprint arXiv:1903.09516},
  Keywords = {Künstliche Intelligenz, Professur, Begutachtung, Kriterien, DBLP, Google Scholar},
  Note = {The Federal Government of Germany aims to boost the research
in the field of Artificial Intelligence (AI). For instance, 100 new professorships are
said to be established. However, the white paper of the government does not
answer what an AI professorship is at all. In order to give colleagues, politicians, and
citizens an idea, we present a view that is often followed when appointing
professors for AI at German and international universities. We hope that it will help
to establish a guideline with internationally accepted measures and thus make the
public debate more informed.},
    Title = {Was ist eine Professur für Künstliche Intelligenz?},
  Url = {https://arxiv.org/pdf/1903.09516.pdf},
  Year = {2019},
  Bdsk-Url-1 = {https://arxiv.org/pdf/1903.09516.pdf}}


    @misc{molina2019spflow,
      Anote = {./images/molina2019spflow.png},
      Author = {Alejandro Molina and Antonio Vergari and Karl Stelzner and Robert Peharz and Pranav Subramani and Nicola Di Mauro and Pascal Poupart and Kristian Kersting},
      Howpublished = {arXiv preprint arXiv:1901.03704},
      Keywords = {Structure Learning, Sum-Product Networks, Deep Learning,  Tensor Flow, PyTorch, Python, Library, Tractable Probabilistic Models},
      Note = {We introduce SPFlow, an open-source Python library providing a simple interface to inference, learning and manipulation routines for deep and tractable
      probabilistic models called Sum-Product Networks (SPNs). The library allows one to quickly create SPNs both from
      data and through a domain specific language (DSL). It efficiently implements several probabilistic inference
      routines like computing marginals, conditionals and (approximate) most probable explanations (MPEs) along with
      sampling as well as utilities for serializing, plotting and structure statistics on an SPN. Moreover, many of
      the algorithms proposed in the literature to learn the structure and parameters of SPNs are readily available
      in SPFlow. Furthermore, SPFlow is extremely extensible and customizable, allowing users to promptly distill new
      inference and learning routines by injecting custom code into a lightweight functional-oriented API framework.
      This is achieved in SPFlow by keeping an internal Python representation of the graph structure that also enables
      practical compilation of an SPN into a TensorFlow graph, C, CUDA or FPGA custom code, significantly speeding-up
      computations.},
        Title = {SPFlow: An Easy and Extensible Library for Deep Probabilistic Learning using Sum-Product Networks},
      Url = {https://arxiv.org/pdf/1901.03704.pdf},
      Crossref = {https://github.com/SPFlow/SPFlow},
      Year = {2019},
      Bdsk-Url-1 = {https://arxiv.org/pdf/1901.03704.pdf}}


      @incollection{kulessa2019query,
        Anote = {./images/kulessa2019query.png},
        Author = {Moritz Kulessa and Alejandro Molina and Carsten Binnig and Benjamin Hilprecht and Kristian Kersting},
        Booktitle = {Working Notes of the 1st International Workshop on Applied AI for Database Systems and Applications (AIDB) at VLDB 2019; also as arXiv preprint arXiv:1811.06224},
        Keywords = {Sum-Product Networks, Deep Learning,  Databases, SQL Queries},
        Note = {Interactive visualizations are arguably the most important tool to explore, understand and convey facts about data. In the past years, the database community has been working on different techniques for Approximate Query Processing (AQP) that aim to deliver an approximate query result given a fixed time bound to support interactive visualizations better. However, classical AQP approaches suffer from various problems that limit the applicability to support the ad-hoc exploration of a new data set: (1) Classical AQP approaches that perform online sampling can support ad-hoc exploration queries but yield low quality if executed over rare subpopulations. (2) Classical AQP approaches that rely on offline sampling can use some form of biased sampling to mitigate these problems but require a priori knowledge of the workload, which is often not realistic if users want to explore a new database. In this paper, we present a new approach to AQP called Model-based AQP that leverages generative models learned over the complete database to answer SQL queries at interactive speeds. Different from classical AQP approaches, generative models allow us to compute responses to ad-hoc queries and deliver high-quality estimates also over rare subpopulations at the same time. In our experiments with real and synthetic data sets, we show that Model-based AQP can in many scenarios return more accurate results in a shorter runtime. Furthermore, we think that our techniques of using generative models presented in this paper can not only be used for AQP in databases but also has applications for other database problems including Query Optimization as well as Data Cleaning.},
          Title = {Model-based Approximate Query Processing},
        Url = {https://arxiv.org/pdf/1811.06224.pdf},
        Year = {2019},
        Bdsk-Url-1 = {https://arxiv.org/pdf/1811.06224.pdf}}


@inproceedings{jentzsch2019aies_moralChoiceMachine,
Anote = {./images/jentzsch2019aies_moralChoiceMachine.png},
Author = {Sophie Jentzsch and Patrick Schramowski and Constantin Rothkopf and Kristian Kersting},
  Booktitle = {Proceedings of the 2nd AAAI/ACM Conference on AI, Ethics, and Society (AIES)},
  Keywords = {Moral Machine, Neural Embedding, Norms, Social Bias},
  Note = {Allowing machines to choose whether to kill humans would
be devastating for world peace and security. But how do
we equip machines with the ability to learn ethical or even
moral choices? Here, we show that applying machine learning to human texts can extract deontological ethical reasoning
about ”right” and ”wrong” conduct. We create a template list
of prompts and responses, which include questions, such as
“Should I kill people?”, “Should I murder people?”, etc. with
answer templates of “Yes/no, I should (not).” The model’s
bias score is now the difference between the models score of
the positive response (“Yes, I should”) and that of the negative response (“No, I should not”). For a given choice overall, the model’s bias score is the sum of the bias scores for
all question/answer templates with that choice. We ran different choices through this analysis using a Universal Sentence Encoder. Our results indicate that text corpora contain
recoverable and accurate imprints of our social, ethical and
even moral choices. Our method holds promise for extracting, quantifying and comparing sources of moral choices in
culture, including technology.},
  Pages = {},
  Title = {Semantics Derived Automatically from Language Corpora Contain Human-like Moral Choices},
  Url = {./papers/jentzsch2019aies_moralChoiceMachine.pdf},
  Crossref = {https://github.com/ml-research/moral-choice-machine},
  Year = {2019}}

  @inproceedings{teso2019aies_XIML,
  Anote = {./images/teso2019aies_XIML.png},
  Author = {Stefano Teso and Kristian Kersting},
    Booktitle = {Proceedings of the 2nd AAAI/ACM Conference on AI, Ethics, and Society (AIES)},
    Keywords = {Explainable AI, Interactive Learning, Active Learning, Lime, Model-agnostic Explanations},
    Note = {Although interactive learning puts the user into the loop, the learner
    remains mostly a black box for the user. Understanding the reasons behind
    queries and predictions is important when assessing how the learner works
    and, in turn, trust. Consequently, we propose the novel framework of
    explanatory interactive learning: in each step, the learner explains its
    query to the user, and the queries of any active classifier for visualizing
    explanations of the corresponding predictions.  We demonstrate that this
    can boost the predictive and explanatory powers of, and the trust into, the
    learned model, using text (e.g. SVMs) and image classification (e.g. neural
    networks) experiments as well as a user study.},
    Pages = {},
    Title = {Explanatory Interactive Machine Learning},
    Url = {./papers/teso2019aies_XIML.pdf},
    Year = {2019}}


@inproceedings{vergari2019aaai_abda,
Anote = {./images/vergari2018tpm.png},
Author = {Antonio Vergari and Alejandro Molina and Robert Peharz and Zoubin Ghahramani and Kristian Kersting and Isabel Valera},
  Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
  Keywords = {Automatic Statistician, Sum-Product Network, Deep Learning, Bayesian Learning, AutoML, Density Estimation},
  Note = {Making sense of a dataset in an automatic and unsupervised fashion is a
  challenging problem in statistics and AI. Classical approaches for density estimation are
  usually not flexible enough to deal with the uncertainty inherent to real-world data: they
  are often restricted to fixed latent interaction models and homogeneous likelihoods; they are
  sensitive to missing, corrupt and anomalous data; and their expressiveness generally comes at
  the price of intractable inference. As a result, supervision from statisticians is usually needed
  to find the right model for the data. However, as domain experts do not necessarily have to be
  experts in statistics, we propose Automatic Bayesian Density Analysis (ABDA) to make density
  estimation accessible at large. ABDA automates the selection of adequate likelihood models
  from arbitrarily rich dictionaries while modeling their interactions via a deep latent structure
  adaptively learned from data as a sum-product network. ABDA casts uncertainty estimation at
  these local and global levels into a joint Bayesian inference problem, providing robust and yet
  tractable inference. Extensive empirical evidence shows that ABDA is a suitable tool for automatic
  exploratory analysis of heterogeneous tabular data, allowing for missing value estimation,
  statistical data type and likelihood discovery, anomaly detection and dependency structure mining,
  on top of providing accurate density estimation.},
  Pages = {},
  Title = {Automatic Bayesian Density Analysis},
  Crossref = {https://github.com/probabilistic-learning/abda},
  Url = {./papers/vergari2019aaai_abda.pdf},
  Year = {2019}}


@inproceedings{das2019aaai_couting,
  Anote = {./images/das2019aaai_counting.png},
  Author = {Mayukh Das and Devendra Singh Dhami and Kunapulli Gautam and Kristian Kersting and Sriraam Natarajan},
  Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
  Keywords = {Lifted Inference, Graph Databases, Approximate Counting, Hypergraphs},
  Note = {Counting the number of true instances of a clause is arguably a major bottleneck in relational
  probabilistic inference and learning. We approximate counts in two steps: (1) transform the fully
  grounded relational model to a large hypergraph, and partially-instantiated clauses to hypergraph
  motifs; (2) since the expected counts of the motifs are provably the clause counts, approximate
  them using summary statistics (in/out-degrees, edge counts, etc). Our experimental results
  demonstrate the efficiency of these approximations, which can be applied to many complex
  statistical relational models, and can be significantly faster than state-of-the-art,
  both for inference and learning, without sacrificing effectiveness.},
  Pages = {},
  Title = {Fast Relational Probabilistic Inference and Learning: Approximate Counting via Hypergraphs},
  Url = {./papers/das2019aaai_couting.pdf},
  Crossref = {https://starling.utdallas.edu/software/boostsrl/wiki/},
  Year = {2019}}


@article{kersting2018mlai,
        Anote = {./images/fontiersBD.png},
        Author = {Kristian Kersting},
        Journal = {Frontiers in Big Data - Section on Machine Learning and Artificial Intelligence},
        Keywords = {Machine Learning, Artificial intelligence, ML=AI?, Editorial },
        Note = {Machine learning (ML) and artificial intelligence (AI) are becoming dominant problem-solving
        techniques in many areas of research and industry, not least because of the recent successes of deep
        learning (DL). However, the equation AI=ML=DL, as suggested by many recent news, blogs and media,
        falls too short. The fields share the same fundamental hypotheses: computation is a useful way to
        model intelligent behaviour in machines. What kind of computation and how to program it? That
        remains an open question. Computation neither rules out search, logical and probabilistic techniques
        nor (deep) (un)supervised and reinforcement learning methods, among others, as computational models
        do contain all of them. They complement each other, and the1 next breakthrough lies not only in
        pushing each of them but also in combining them.},
        Pages = {doi:10.3389/fdata.2018.00006},
        Publisher = {},
        Title = {Machine Learning and Artificial Intelligence: Two Fellow Travelers on the Quest for Intelligent Behaviour in Machines},
        Url = {./papers/kersting2018aiml_frontiers.pdf},
        Volume = {},
        number = {},
        Year = {2018},
    }

    @article{kersting2018ki_editorial,
      Anote = {./images/ki2018.jpg},
      Author = {Kristian Kersting},
      Journal = {Künstliche Intelligenz (KI)},
      Keywords = {Enquete Commission AI, Artificial Intelligence, Machine Learning, Computational Cognitive Science, Editorial},
      Note = {},
      Pages = {doi:10.1007/s13218-018-0562-8},
      Publisher = {Springer},
      Title = {Making AI Smarter},
      Url = {https://doi.org/10.1007/s13218-018-0562-8},
      Volume = {},
      number = {},
      Year = {2018},
  }


@inproceedings{stelzner2018probprog,
    Anote = {./images/stelzner2018probprog.png},
  Author = {Karl Stelzner and Alejandro Molina and Robert Peharz and Antonio Vergari and Martin Trapp and Isabel Valera and Zoubin Ghahramani and Kristian Kersting},
  Booktitle = {Proceedings of the Inaugural International Conference on Probabilistic Programming (ProbProg)},
  Note = {Probabilistic models provide a framework for describing abstract prior knowledge and using it to reason under uncertainty. Deep probabilistic programming languages (PPL) are a powerful tool to ease the development of probabilistic models. They let users specify generative probabilistic models as programs and then ``compile'' those models down into inference procedures. Since probabilistic inference is still intractable, existing deep PPLs leverage deep learning for inference. The key idea is to describe inference via a second model called an inference model trained in variational fashion. Both the generative and the inference models can include deep neural networks as components.
While the details of the employed neural architectures may differ, they typically model distributions only implicitly, i.e., they allow for sampling but not the computation of marginal probabilities.
This extended abstract makes the case to not "go down the full neural road", but to explicitly obtain uncertainties in an arithmetic circuit manner. To this end, sum-product networks (SPNs) are a promising option, as they are a class of probabilistic model which permit explicit uncertainties and efficient inference.
More precisely, SPNs can compute any marginalization and conditioning query in time linear of the model's representation size. Although SPNs can be described in a nutshell as "deep mixture models", they have received no attention in the deep PPL community, despite their attractive inference properties.},
    Keywords = {Probabilistic Programming, Sum-Product Networks, Deep Learning, SPFlow, Random SPNs, VAE, Attend-Infer-Repeat },
  Pages = {},
  Title = {SP3 - Sum Product Probabilistic Programming},
  Url = {./papers/sp3_probprog2018.pdf},
  Year = {2018}}
sp3_probprog2018


@inproceedings{das2018probprog,
    Anote = {./images/das018probprog.png},
  Author = {Mayukh Das and Devendra Singh Dhami and Kunapulli Gautam and Kristian Kersting and Sriraam Natarajan},
  Booktitle = {Proceedings of the Inaugural International Conference on Probabilistic Programming (ProbProg)},
  Note = {Inference and Parameter Learning inside Probabilistic Programming use an important operation that can be approximated: counting.
  We present an efficient approximation scheme that allows for fast counting and consequently faster inference and learning.},
    Keywords = {Probabilistic Programming, Statistical Relational Learning, Counting},
  Pages = {},
  Title = {Approximate Counting for Fast Inference and Learning in Probabilistic Programming},
  Year = {2018}}


  @inproceedings{natarajan2018probprog,
      Anote = {./images/natarajan018probprog.png},
    Author = {Sriraam Natarajan and Phillip Odom and Tushar Khot and Kristian Kersting and Jude Shavlik},
    Booktitle = {Proceedings of the Inaugural International Conference on Probabilistic Programming (ProbProg)},
    Note = {We present our BoostSRL system, a Java-based learning system
  that inductively learns probabilistic logic clauses from
  data. Our system is capable of learning different types of
  models, handling modeling of hidden data, learning with
  preferences from humans, scaling with large amounts of
  data by approximate counting and modeling temporal data.
  We review these capabilities briefly in this short paper.},
      Keywords = {Probabilistic Programming, Relational Boosting, Human-in-the-loop, Interactive ML},
    Pages = {},
    Url = {./papers/probProg2018_BoostSRL.pdf},
    Title = {Human-in-the-loop Learning for Probabilistic Programming},
    Crossref = {https://starling.utdallas.edu/software/boostsrl/wiki/},
    Year = {2018}}


@inproceedings{sommer2018iccd,
    Anote = {./images/sommer2018tpm.png},
  Author = {Lukas Sommer and Julian Oppermann and Alejandro Molina and Carsten Binnig and Kristian Kersting and Andreas Koch},
  Booktitle = {Proceedings of the 36th IEEE International Conference on Computer Design (ICCD)},
  Note = {FPGAs have recently proven to be ideally suited for the implementation of efficient accelerators for a wide
    range of machine learning tasks. Here, we consider probabilistic models, specifically, (Mixed) Sum-Product Networks
    (SPN), a deep architecture that can provide tractable inference for multivariate distributions over mixed data-sources.
We show how to construct an FPGA-based accelerator for the inference in (mixed) SPNs. Starting from an input description of
the network, we develop a fully automatic synthesis flow to a custom FPGA-accelerator. The synthesized accelerator and its
interface to the external memory on the FPGA are fully pipelined, and computations are conducted using double-precision floating
point arithmetic.  To the best of our knowledge, this work is the first approach to offload the (mixed) SPN inference problem to FPGA-based
accelerators. Our evaluation shows that the SPN inference problem can profit from offloading to an FPGA accelerator.},
    Keywords = {FPGA, Sum-Product Networks, Deep learning, Hardware implementation },
  Pages = {},
  Url = {./papers/spn_fpga_iccd18.pdf},
  Title = {Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-based Accelerators},
  Year = {2018}}


@inproceedings{ramanan2018kr_learningRLR,
  Anote = {./images/ramanan2018kr_learningRLR.png},
  Author = {Nandini Ramanan and Gautam Kunapuli and Tushar Khot and Bahare Fatemi and Seyed Mehran Kazemi and David Poole and Kristian Kersting and Sriraam Natarajan},
  Booktitle = {Proceedings of the 16th International Conference on Principles of Knowledge Representation and Reasoning (KR). Also presented at the Hybrid Reasoning
  and Learning Workshop (HRL) at KR 2018. Longer version as arXiv:1808.02123.},
  Note = {We consider the problem of learning Relational Logistic Regression
(RLR). Unlike standard logistic regression, the features
of RLRs are first-order formulae with associated weight
vectors instead of scalar weights. We turn the problem of
learning RLR to learning these vector-weighted formulae and
develop a learning algorithm based on the recently successful
functional-gradient boosting methods for probabilistic logic
models. We derive the functional gradients and show how
weights can be learned simultaneously in an efficient manner.
Our empirical evaluation on standard and novel data sets
demonstrates the superiority of our approach over other methods
for learning RLR.},
    Keywords = {Statistical relational learning, Gradient Boosting, Relational Logistic Regression},
  Pages = {},
  Url = {https://arxiv.org/pdf/1808.02123.pdf},
  Title = {Structure Learning for Relational Logistic Regression: An Ensemble Approach},
  Year = {2018}}


@incollection{vergari2018tpm,
    Anote = {./images/vergari2018tpm.png},
    Author = {Antonio Vergari and Alejandro Molina and Robert Peharz and Zoubin Ghahramani and Kristian Kersting and Isabel Valera},
    Booktitle = {Working Notes of the ICML 2018 Workshop on Tractable Probabilistic Models (TPM); also arXiv preprint arXiv:1807.09306},
    Note = {Making sense of a dataset in an automatic, unsupervised fashion is a challenging problem in statistics
      and AI. For example, classical approaches for density estimation do not naturally deal with heterogeneous
      statistical data types, do not automatically select suitable parametric forms for the likelihood models, and,
      in basic formulations, are sensitive to corrupted data and outliers. To overcome this, we propose to extend
      density estimation to Automatic Bayesian Density Analysis (ABDA), casting both data modeling and selection of adequate likelihood models (statistical data types) into a joint inference problem. Specifically, we advocate a hierarchically structured mixture model, which explicitly incorporates arbitrarily rich collections of likelihood models and corresponding latent selection variables, and captures variable interactions by a latent hierarchical structure obtained from data-type agnostic structure learning. To account for prediction uncertainty, selection of parametric likelihood models and
      statistical data types, we employ Bayesian inference over a model formulated as a sum-product network,
      naturally providing the aforementioned expressiveness and flexibility, while facilitating exact and tractable
      inference.},
      Keywords = {Sum-Product Networks, Deep Learning, Automatic Statistician, Density Estimation, Density Analysis, AutoML},
    Pages = {},
    Title = {Automatic Bayesian Density Analysis},
    Url = {https://arxiv.org/pdf/1807.09306.pdf},
    Year = {2018}}


@incollection{peharz2018ratspns,
          Anote = {./images/peharz2018ratspns.png},
          Booktitle = {Working Notes of the UAI 2018 Workshop on Uncertainty in Deep Learning (UDL); also arXiv preprint arXiv:1806.01910},
          Author = {Robert Peharz and Antonio Vergari and Karl Stelzner and Alejandro Molina and Martin Trapp and Kristian Kersting and Zoubin Ghahramani},
          Keywords = {deep learning, probabilistic deep learning, sum product networks, random models, generative model},
          Note = {Probabilistic deep learning currently receives an increased interest, as
            consistent treatment of uncertainty is one of the most important goals in machine
            learning and AI. Most current approaches, however, have severe limitations concerning
            inference. Sum-Product networks (SPNs), although having excellent properties in that regard,
            have so far not been explored as serious deep learning models, likely due to their special
            structural requirements. In this paper, we make a drastic simplification and use a random
            structure which is trained in a "classical deep learning manner" such as automatic
            differentiation, SGD, and GPU support. The resulting models, called RAT-SPNs, yield
            comparable prediction results to deep neural networks, but maintain well-calibrated
            uncertainty estimates which makes them highly robust against missing data. Furthermore,
            they successfully capture uncertainty over their inputs in a convincing manner, yielding
            robust outlier and peculiarity detection.},
          Pages = {},
          Publisher = {},
          Title = {Probabilistic Deep Learning using Random Sum-Product Networks},
          Url = {https://arxiv.org/pdf/1806.01910.pdf},
          Year = {2018},
          Bdsk-Url-1 = {https://arxiv.org/pdf/1806.01910.pdf}}


  @incollection{sommer2018tpm,
    Anote = {./images/sommer2018tpm.png},
    Author = {Lukas Sommer and Julian Oppermann and Alejandro Molina and Carsten Binnig and Kristian Kersting and Andreas Koch},
    Booktitle = {Working Notes of the ICML 2018 Workshop on Tractable Probabilistic Models (TPM)},
    Note = {FPGAs have recently proven to be ideally suited for the implementation of efficient accelerators for a wide
      range of machine learning tasks. Here, we consider probabilistic models, specifically, (Mixed) Sum-Product Networks
      (SPN), a deep architecture that can provide tractable inference for multivariate distributions over mixed data-sources.
We show how to construct an FPGA-based accelerator for the inference in (mixed) SPNs. Starting from an input description of
the network, we develop a fully automatic synthesis flow to a custom FPGA-accelerator. The synthesized accelerator and its
interface to the external memory on the FPGA are fully pipelined, and computations are conducted using double-precision floating
point arithmetic.  To the best of our knowledge, this work is the first approach to offload the (mixed) SPN inference problem to FPGA-based
accelerators. Our evaluation shows that the SPN inference problem can profit from offloading to an FPGA accelerator.},
      Keywords = {FPGA, Sum-Product Networks, Deep learning, Hardware implementation },
    Pages = {},
    Title = {Automatic Synthesis of FPGA-based Accelerators for the Sum-Product Network Inference Problem},
    Url = {},
    Year = {2018}}


  @inproceedings{kordjamshidi2018ijcaiecai_systemsai,
    Anote = {./images/kordjamshidi2018ijcaiecai_systemsai.png},
    Author = {Parisa Kordjamshidi and Dan Roth and Kristian Kersting},
    Booktitle = {Proceedings of the 27th International Joint Conference on Artificial Intelligence and the 23rd European Conference on Artificial Intelligence (IJCAI-ECAI)},
    Note = {Data-driven approaches are becoming dominant problem-solving techniques in many areas of research and industry. Unfortunately, current technologies do not make it easy to use them for application experts that are not fluent in machine learning technologies. We review key efforts made by various AI communities to provide languages for high-level abstractions
over learning and reasoning techniques needed for designing complex AI systems. We classify the existing
frameworks based on the techniques as well as the data and knowledge representations they use, provide a
comparative study of the way they address the challenges of programming real-world applications, and highlight
some shortcomings and future directions.  },
      Keywords = {Systems AI, Learning based programming, Statistical relational learning, Complex AI systems, Survey},
    Pages = {},
    Title = {Systems AI: A Declarative Learning Based Programming Perspective},
    Url = {./papers/kordjamshidi2018ijcaiecai_systemsai.pdf},
    Year = {2018}}

  @incollection{gries2018esof,
    Anote = {./images/esof2018.png},
    Author = {Lucas Gries and Edith Luschmann and Laurent Gautier and Lars Koppers and Kristian Kersting and Jörg Rahnenführer and Julia Serong and Holger Wormer },
    Booktitle = {Poster Proceedings of the Euroscience Open Forum (ESOF) Conference},
    Note = {It’s surprising how often scientists are surprised by their findings: This observation was
described in Nature by the Polish Researcher Michael Jasienski more than ten years ago. In
2015 Christiaan Vinkers from the Netherlands compared the use of and semantic valence
between positive and negative words of PubMed abstracts from 1974 to 2014 and found signs
for increasing sensationalism. However, systematic tools to evaluate "sensationalism" and
similar aspects concerning the wording in pieces of science communication are still missing.
Furthermore, comparisons between European countries are difficult, as systematic access
(e.g., to scientific press releases) differs from one country to another.
Our interdisciplinary group will present approaches to compare press releases from different
disciplines, institutions and countries by means of database examples from Germany
(providing an already analysed database extracted from the “Informationsdienst
Wissenschaft” of more than 300 000 press releases from about 1000 institutions) and France
(using a newly constructed corpus from more than 100 institutions). Considering different
perspectives from communication science, informatics and statistics we discuss to what extent
indexes based on a set of "sensational words" and other forms of linguistic analysis may deliver
hints for unethical exaggerations in different fields of science communication.},
      Keywords = {Poster, Sensationalism, Communication Science, Linguistic Analysis},
    Pages = {},
    Title = {Science, Surprise and Sensation in Science Communication: Towards a Concept for Cross-Country Comparisons of Press Releases from Research Institutions},
    Url = {./papers/gries2018ESOF_poster.pdf},
    Year = {2018}}


    @incollection{bauer2018ecda,
      Anote = {./images/ecda2018.png},
      Author = {Nadja Bauer and Malte Jastrow and Daniel Horn and Lukas Stankiewicz and Kristian Kersting and Jochen Deuse and Claus Weihs},
      Booktitle = {Abstract Proceedings of the European Conference on Data Analysis (ECDA)},
      Note = {The advent of industry 4.0 and the availability of large data storage systems lead to an increasing demand
for specially educated data-oriented professionals in industrial production. The education of such specialists
is supposed to combine elements from the three fields of engineering, data analysis and data administration.
Data administration skills are demanded to handle big data in diverse storage structures in data bases.
In order to extract knowledge from the stored data the proficient handling of data analysis tools - especially
machine learning - is essential. Finally, industrial domain knowledge is important to identify possible applications
for machine learning algorithms (and to interpret the results). However, to the best knowledge of the authors, an education program incorporating
elements of all three fields has not yet been established (in Germany).

In the context of the newly acquired project "Industrial Data Science" (InDaS) we aim to develop a qualification
concept for machine learning in industrial production targeted at two different groups. On the one hand advanced
students from any of the three fields mentioned above and on the other hand experienced professionals working in
industrial production. For the first group a one term lecture is going to be designed while coping with different
levels of knowledge in the inhomogeneous audience. It will be followed by a seminar with focus on use cases delivered
by partners from industrial production. Separately a workshop concept for the second target group will be developed
taking into account the strong domain knowledge of the participants.

The contents of the qualification concept should be selected according to the needs of industrial companies.
Therefore a survey was created to inquire the use and potentials of machine learning and the requirements for future
employees in industrial production. The evaluation of the survey and the resulting conclusions affecting the qualification
concept are going to be presented in this talk.},
        Keywords = {Abstract, machine learning, engineering, production, qualification concept},
      Pages = {},
      Title = {Industrial Data Science: developing a qualification concept for machine learning in industrial production},
      Url = {},
      Year = {2018}}


  @inproceedings{kolb2018ijcaiecai_xadds,
    Anote = {./images/kolb2018ijcaiecai_xadds.png},
    Author = {Samuel Kolb and Martin Mladenov and Scott Sanner and Vaishak Belle and Kristian Kersting},
    Booktitle = {Proceedings of the 27th International Joint Conference on Artificial Intelligence and the 23rd European Conference on Artificial Intelligence (IJCAI-ECAI)},
    Note = {In recent years, weighted model counting (WMC) has become the de facto work-horse for logically-structured inference tasks.
      Problems ranging over probabilistic inference, planning, game theory, and numerical optimization have been tackled successfully using WMC technology.
      Weighted model integration (WMI) is a recent formalism generalizing WMC to the integration of functions over models of mixed discrete-continuous theories.
      WMI has already shown tremendous promise for solving inference problems in graphical models and probabilistic programs.
      Yet, state of the art tools for WMI are nowhere as versatile as those for WMC--solvers are generally limited either by the range of amenable theories, or in terms of performance.
      Moreover, the data structures to represent, compose, and transform formulas over mixed theories have not reached the efficiency of their Boolean counterparts.
      Arguably, the latter is one of the prime reasons why WMC is so widely applicable.
      To address both limitations, we propose the use of extended algebraic decision diagrams (XADDs) as a compilation language for WMI.
      Aside from tackling typical WMI problems XADDs also enable partial WMI yielding parametrized solutions.
      To overcome the main roadblock of XADDs---the computational cost of variable elimination---we formulate a novel and powerful exact symbolic dynamic programming (SDP)
      algorithm that unlike its predecessor is able to effectively cache partial computations and seamlessly handle Boolean, integer-valued, and real variables.
      Our empirical results demonstrate that the improved algorithm can lead to an exponential to linear computational reduction in the best case and that it exceeds or
      matches state-of-the-art WMI solvers in terms of performance.
},
      Keywords = {Variable elimination for WMI, XADDs, SMT, Weighted Model Integration (WMI), Probabilsitic Inference, Smybolic Integration},
    Pages = {},
    Title = {An Efficient Symbolic Partial Integration Operator for Probabilistic Inference},
    Url = {./papers/kolb2018ijcaiecai_xadds.pdf},
    Year = {2018}}


    @inproceedings{luedtke2018ijcaiecai_liftedFiltering,
      Anote = {./images/liftedFiltering.png},
      Author = {Stefan Lüdtke and Max Schröder and Sebastian Bader and Kristian Kersting and Thomas Kirste},
      Booktitle = {Proceedings of the 27th International Joint Conference on Artificial Intelligence and the 23rd European Conference on Artificial Intelligence (IJCAI-ECAI)},
      Keywords = {Lifted Inference, Filtering, Planning, Symmetries},
      Note = {We present a model for recursive Bayesian filtering based on lifted multiset states.
        Combining multisets with lifting makes it possible to simultaneously exploit multiple strategies
        for reducing inference complexity when compared to list-based grounded state representations. The
        core idea is to borrow the concept of Maximally Parallel Multiset Rewriting Systems and to enhance
        it by concepts from Rao-Blackwellisation and Lifted Inference, giving a representation of state
        distributions that enables efficient inference. In worlds where the random variables that define
        the system state are exchangeable - where the identity of entities does not matter - it automatically
        uses a representation that abstracts from ordering (achieving an exponential reduction
        in complexity) and it automatically adapts when observations or system dynamics destroy
        exchangeability by breaking symmetry.},
      Title = {Lifted Filtering via Exchangeable Decomposition},
      Url = {https://arxiv.org/pdf/1801.10495.pdf},
      Year = {2018},
      Bdsk-Url-1 = {https://arxiv.org/pdf/1801.10495.pdf}}


      @article{antanas2019auro,
        Anote = {./images/antanas2018auro.png},
        Author = {Laura Antanas and Plinio Moreno and Marion Neumann and Rui {Pimentel de Figueiredo} and Kristian Kersting and José Santos-Victor and Luc {De Raedt}},
        Journal = {Autonomous Robots (AURO)},
        Keywords = {Robotics, Statistical Relational Learning, Grasping, Hybrid Domains },
        Note = {While any grasp must satisfy the grasping stability criteria, good grasps depend on the
specific manipulation scenario: the object, its properties and functionalities, as well as
the task and grasp constraints. We propose a probabilistic logic approach for robot
grasping, which improves grasping capabilities by leveraging semantic object parts. It
provides the robot with semantic reasoning skills about the most likely object part to be
grasped, given the task constraints and object properties, while also dealing with the
uncertainty of visual perception and grasp planning. The probabilistic logic framework
is task-dependent. It semantically reasons about pre-grasp configurations with respect
to the intended task and employs object-task affordances and object/task ontologies to
encode rules that generalize over similar object parts and object/task categories. The
use of probabilistic logic for task-dependent grasping contrasts with current
approaches that usually learn direct mappings from visual perceptions to task-dependent
grasping points. The logic-based module receives data from a low-level
module that extracts semantic objects parts, and sends information to the low-level
grasp planner. These three modules define our probabilistic logic framework, which is
able to perform robotic grasping in realistic kitchen-related scenarios.},
        Publisher = {Springer},
        Title = {Semantic and Geometric Reasoning for Robotic Grasping: A Probabilistic Logic Approach},
        Url = {https://link.springer.com/article/10.1007/s10514-018-9784-8},
        Volume = {43},
        Number = {6},
        Pages = {1393--1418},
        Year = {2019},
    }


    @article{kersting2018ki,
      Anote = {./images/ki2018.jpg},
      Author = {Kristian Kersting and Ulrich Meyer},
      Journal = {Künstliche Intelligenz (KI)},
      Keywords = {Artificial Intelligence, Big Data, Algorithmic Challenges, Editorial},
      Note = {Big Data is no fad. The world is growing at an exponential rate, and so is the size of data
        collected across the globe. The data is becoming more meaningful and contextually relevant, breaks
        new ground for machine learning and artificial intelligence (AI), and even moves them from research
        labs to production. That is, the problem has shifted from collecting massive amounts of data to
        understanding it, i.e., turning data into knowledge, conclusions, and actions. This Big AI,
        however, often faces poor scale-up behaviour from algorithms that have been designed based on
        models of computation that are no longer realistic for Big Data. This special issue constitutes
        an attempt to highlight the algorithmic challenges and opportunities but also the social and
        ethical issues of Big Data. Of specific interest and focus have been computation- and
        resource-efficient algorithms when searching through data to find and mine relevant or
        pertinent information.},
      Pages = {3--8},
      Publisher = {Springer},
      Title = {From Big Data to Big Artificial Intelligence? - Algorithmic Challenges and Opportunities of Big Data},
      Url = {https://link.springer.com/content/pdf/10.1007%2Fs13218-017-0523-7.pdf},
      Volume = {32},
      number = {1},
      Year = {2018},
  }

  @misc{teso2018explanatory,
    Anote = {./images/teso2018explanatory.png},
    Author = {Stefano Teso and Kristian Kersting},
    Howpublished = {arXiv preprint arXiv:1805.08578; also abstract at the 14th Biannual Conference of the German Society for Cognitive Science (KogWis) 2018},
    Keywords = {trust, explanations, active learning, explantory interactive learning},
    Note = {Although interactive learning puts the user into the loop, the learner
      remains mostly a black box for the user. Understanding the reasons behind queries
      and predictions is important when assessing how the learner works and, in turn, trust.
      Consequently, we propose the novel framework of explanatory interactive learning:
      in each step, the learner explains its interactive query to the user, and she
      queries of any active classifier for visualizing explanations of the corresponding
      predictions. We demonstrate that this can boost the predictive and explanatory powers of and the trust into the learned model, using text (e.g. SVMs) and image classification (e.g. neural networks) experiments as well as a user study.},
    Title = {"Why Should I Trust Interactive Learners?" Explaining Interactive Queries of Classifiers to Users},
    Url = {https://arxiv.org/pdf/1805.08578.pdf},
    Year = {2018},
    Bdsk-Url-1 = {https://arxiv.org/pdf/1805.08578.pdf}}


  @misc{schramowski2018neuralfw,
  	Anote = {./images/neuralfw.png},
  	Author = {Patrick Schramowski and Christian Bauckhage and Kristian Kersting},
  	Howpublished = {arXiv preprint arXiv:1803.04300},
  	Keywords = {Frank Wolfe, Deep Learning, Neural Networks, Learnign to learn, meta learning},
  	Note = {The move from hand-designed to learned optimizers
in machine learning has been quite successful
for gradient-based and -free optimizers. When
facing a constrained problem, however, maintaining
feasibility typically requires a projection step,
which might be computationally expensive and
not differentiable. We show how the design of
projection-free convex optimization algorithms
can be cast as a learning problem based on FrankWolfe
Networks: recurrent networks implementing
the Frank-Wolfe algorithm aka. conditional
gradients. This allows them to learn to exploit
structure when, e.g., optimizing over rank-1 matrices.
Our LSTM-learned optimizers outperform
hand-designed as well learned but unconstrained
ones. We demonstrate this for training support
vector machines and softmax classifiers},
  	Title = {Neural Conditional Gradients},
  	Url = {https://arxiv.org/pdf/1803.04300.pdf},
  	Year = {2018},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1803.04300.pdf}}


    @misc{luedtke2018liftedFiltering,
      Anote = {./images/liftedFiltering.png},
      Author = {Stefan Lüdtke and Max Schröder and Sebastian Bader and Kristian Kersting and Thomas Kirste},
      Howpublished = {arXiv preprint arXiv:1801.10495},
      Keywords = {Lifted Inference, Filtering, Planning, Symmetries},
      Note = {We present a model for recursive Bayesian filtering based on lifted multiset states.
        Combining multisets with lifting makes it possible to simultaneously exploit multiple strategies
        for reducing inference complexity when compared to list-based grounded state representations. The
        core idea is to borrow the concept of Maximally Parallel Multiset Rewriting Systems and to enhance
        it by concepts from Rao-Blackwellisation and Lifted Inference, giving a representation of state
        distributions that enables efficient inference. In worlds where the random variables that define
        the system state are exchangeable - where the identity of entities does not matter - it automatically
        uses a representation that abstracts from ordering (achieving an exponential reduction
        in complexity) and it automatically adapts when observations or system dynamics destroy
        exchangeability by breaking symmetry.},
      Title = {Lifted Filtering via Exchangeable Decomposition},
      Url = {https://arxiv.org/pdf/1801.10495.pdf},
      Year = {2018},
      Bdsk-Url-1 = {https://arxiv.org/pdf/1801.10495.pdf}}

  @inproceedings{lioutikov2018icra_probGramMove,
    Anote = {./images/lioutikov2018icra_probGramMove.png},
    Author = {Rudolf Lioutikov and Guilherme Maeda and Filipe Veiga and Kristian Kersting and Jan Peters},
    Booktitle = {Proceedings of the IEEE International Conference on Robotics and Automation (ICRA)},
    Note = {Movement Primitives are a well studied and
widely applied concept in modern robotics. Composing primitives
out of an existing library, however, has shown to be
a challenging problem. We propose the use of probabilistic
context-free grammars to sequence a series of primitives to
generate complex robot policies from a given library of primitives.
The rule-based nature of formal grammars allows an
intuitive encoding of hierarchically and recursively structured
tasks. This hierarchical concept strongly connects with the way
robot policies can be learned, organized, and re-used. However,
the induction of context-free grammars has proven to be a
complicated and yet unsolved challenge. In this work, we exploit
the physical nature of robot movement primitives to restrict
and efficiently search the grammar space. The grammar is
learned with Markov Chain Monte Carlo optimization over the
posteriors of the grammars given the observations. Restrictions
over operators connecting the search define the corresponding
proposal distributions and, therefore, guide the optimization
additionally. In experiments, we validate our method on a
redundant 7 degree-of-freedom lightweight robotic arm on tasks
that require the generation of complex sequences of motions out
of simple primitives.},
      Keywords = {Robotics, Movement Primitives, Probabilistic Grammar, Bayesian Grammar Induction, Grammar Prior},
    Pages = {},
    Title = {Inducing Probabilistic Context-Free Grammars for the Sequencing of Robot Movement Primitives},
    Url = {./papers/lioutikov2018icra_probGramMove.pdf},
    Year = {2018}}

  @inproceedings{binnig2018sysml_deepVizdom,
    Anote = {./images/binnig2018sysml_deepVizdom.png},
    Author = {Carsten Binnig and Kristian Kersting and Alejandro Molina and Emanuel Zgraggen},
    Booktitle = {Proceedings of the Inaugural Systems and Machine Learning Conference (SysML)},
    Note = {We make the case for a new generation of interactive data exploration systems
      that seamlessly integrate deep models as first class citizens into the data exploration stack.
      Based on three case studies, we argue that this not only enables users to gain a much
      deeper insights into a broader range of data sets but also
      helps to improvethe performance and quality of existing data exploration systems.},
      Keywords = {Databases, Interactive ML, Automatic Statistician, ML Systems, Deep Learning, Generative Model, Sum Product Networks},
    Pages = {},
    Title = {DeepVizdom: Deep Interactive Data Exploration},
    Url = {./papers/binnig2018sysml_deepVizdom.pdf},
    Year = {2018}}

  @inproceedings{molina2018aaai_mspn,
    Anote = {./images/molina2018aaai_mspn.png},
    Author = {Alejandro  Molina and Antonio Vergari and Nicola Di Mauro and Floriana Esposito and Siraam Natarajan and Kristian Kersting},
    Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
    Keywords = {Hybrid Domains,Deep Learning, Generative Model, Sum Product Networks, Hirschfeld-Gebelein-Rényi, Automatic Statistician, Structure Learning},
    Note = {While all kinds of mixed data — from personal data, over panel and scientific data,
      to public and commercial data — are collected and stored, building probabilistic graphical models for
      these hybrid domains becomes more difficult. Users spend significant amounts of time in identifying the
      parametric form of the random variables (Gaussian, Poisson, Logit, etc.) involved and learning the mixed
      models. To make this difficult task easier, we propose the first trainable probabilistic deep
      architecture for hybrid domains that features tractable queries. It is based on Sum-Product Networks (SPNs)
      with piecewise polynomial leave distributions together with novel nonparametric decomposition and
      conditioning steps using the Hirschfeld-Gebelein-Rényi Maximum Correlation Coefficient. This relieves
      the user from deciding a-priori the parametric form of the random variables but is still expressive
      enough to effectively approximate any continuous distribution and permits efficient learning and inference.
      Our empirical evidence shows that the architecture, called Mixed SPNs, can indeed capture complex distributions
      across a wide range of hybrid domains.},
    Pages = {},
    Title = {Mixed Sum-Product Networks: A Deep Architecture for Hybrid Domains},
      Crossref = {https://github.com/SPFlow/SPFlow},
    Url = {./papers/molina2018aaai_mspns.pdf},
    Year = {2018}}

    @inproceedings{molina2018aaai_cdn,
      Anote = {./images/molina2018aaai_cdn.png},
      Author = {Alejandro  Molina and Alexander Munteanu and Kristian Kersting},
      Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
      Keywords = {Core Sets, eps-subspace embedding, Dependency Networks, Graphical Models, Generative Model, Structure Learning, Gaussian, Poisson},
      Note = {Many applications infer the structure of a probabilistic graphical model from data to elucidate the
        relationships between variables. But how can we train graphical models on a massive data set? In this paper,
        we show how to construct coresets — compressed data sets which can be used as proxy for the original data and
        have provably bounded worst case error — for Gaussian dependency networks (DNs), i.e., cyclic directed graphical
        models over Gaussians, where the parents of each variable are its Markov blanket. Specifically, we prove that
        Gaussian DNs admit coresets of size independent of the size of the data set. Unfortunately, this does not
        extend to DNs over members of the exponential family in general. As we will prove, Poisson DNs do not admit
        small coresets. Despite this worst-case result, we will provide an argument why our coreset construction for
        DNs can still work well in practice on count data. To corroborate our theoretical results, we empirically
        evaluated the resulting Core DNs on real data sets. The results demonstrate significant gains over no or naive
        sub-sampling, even in the case of count data.},
      Pages = {},
      Title = {Core Dependency Networks},
      Url = {./papers/molina2018aaai_cdns.pdf},
      Year = {2018}}

      @inproceedings{vergari2018aaai_spae,
        Anote = {./images/vergari2018aaai_spae.png},
        Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI); also in the Working Notes of the ICML 2018 Workshop on Tractable Probabilistic Models (TPM)},
        Keywords = {Auto Encoder, MPE, Deep Learning, Sum Product Networks, Graphical Models, Generative Model, Embedding},
        Author = {Antonio Vergari and Robert Peharz and Nicola Di Mauro and Alejandro  Molina and Kristian Kersting and Floriana Esposito},
        Note = {Sum-Product Networks (SPNs) are a deep probabilistic architecture that up to now has been successfully
          employed for tractable inference. Here, we extend their scope towards unsupervised representation learning:
          we encode samples into continuous and categorical embeddings and show that they can also be decoded back into
          the original input space by leveraging MPE inference. We characterize when this Sum-Product Autoencoding (SPAE)
          leads to equivalent reconstructions and extend it towards dealing with missing embedding information.
          Our experimental results on several multi-label classification problems demonstrate that SPAE is competitive
          with state-of-the-art autoencoder architectures, even if the SPNs were never trained to reconstruct their inputs.},
        Pages = {},
        Title = {Sum-Product Autoencoding: Encoding and Decoding Representations using Sum-Product Networks},
        Url = {./papers/vergari2018aaai_spae.pdf},
        Year = {2018}}

        @inproceedings{kaur2017ilp_rrbms,
          Anote = {./images/kaur2017ilp_rrbms.png},
          Booktitle = {Proceedings of the 27th international Conference on Inductive Logic Prorgamming (ILP)},
          Keywords = {Relational Learning, Restricted Boltzman Maschine, Deep Learning, Random Walks},
          Author = {Navdeep Kaur and Gautam Kunapuli and Tushar Khot and Kristian Kersting and William Cohen and Sriraam Natarajan. },
          Note = {We consider the problem of learning Boltzmann machine classifiers from relational data. Our goal is to extend the deep belief framework of RBMs to
            statistical relational models. This allows one to exploit the feature hierarchies and the non-linearity inherent in RBMs over the rich representations used in statistical
            relational learning (SRL). Specifically, we use lifted random walks to generate features for predicates that are then used to construct the observed features in the
            RBM in a manner similar to Markov Logic Networks. We show empirically that this method of constructing an RBM is comparable or better than the state-of-the-art
            probabilistic relational learning algorithms on four relational domains.},
          Pages = {},
          Title = {Relational Restricted Boltzmann Machines: A Probabilistic Logic Learning Approach},
          Url = {./papers/kaur2017ilp_rrbms.pdf},
          Year = {2017}}


  @article{wahabzada2017plosone,
    Anote = {./images/wahabzada2017plosone_woundHealing.png},
    Author = {Mirwaes Wahabzada and Manuela Besser and Milad Khosravani and Matheus Thomas
Kuska and Kristian Kersting and Anne-Katrin Mahlein and Ewa Sturmer},
    Journal = {PLoS One},
    Keywords = { Wound Healing, Hyperspectral Imaging, Hierachical Decomposition, FastMap projection, Convex NMF, Machine Learning},
    Note = {Wound healing is a complex and dynamic process with different distinct and
overlapping phases from homeostasis, inflammation and proliferation to remodelling.
Monitoring the healing response of injured tissue is of high importance for basic
research and clinical practice. In traditional application, biological markers characterize
normal and abnormal wound healing. Understanding functional relationships of these
biological processes is essential for developing new treatment strategies. However, most
of the present techniques (in vitro or in vivo) include invasive microscopic or analytical
tissue sampling. In the present study, a non-invasive alternative for monitoring
processes during wound healing is introduced. Within this context, hyperspectral
imaging (HSI) is an emerging and innovative non-invasive imaging technique with
different opportunities in medical applications. HSI acquires the spectral reflectance of
an object, depending on its biochemical and structural characteristics. For analysing
the complex hyperspectral data, an efficient unsupervised approach for clustering
massive hyperspectral data was designed, based on efficient hierarchical decomposition
of spectral information according to archetypal data points. It represents, to the best of
our knowledge, the first application of an advanced Data Mining approach in context of
non-invasive analysis of wounds using hyperspectral imagery.},
    Pages = {},
    Publisher = {Public Library of Science},
    Title = {Monitoring Wound Healing in a 3D Wound Model by Hyperspectral Imaging and Efficient Clustering},
    Url = {./papers/wahabzada2017plosone_woundHealing.pdf},
    Volume = {},
    Year = {2017}
}


  @inproceedings{yang2017bibm,
    Anote = {./images/yang2017bibm.png},
    Author = {Shuo Yang and Fabian Hadiji and Kristian Kersting and Shaun Grannis and Sriraam Natarajan },
    Booktitle = {Proceedings of the IEEE Conference on Bioinformatics and Biomedicine (BIBM)},
    Keywords = {Angioplasty, Electronic Health Records, Predicting the number of Procedures, Dependency Networks, Multinomials, Poisson, Functional Gradient Boosting },
    Note = {We consider the problem of predicting the number of coronary artery procedures, specifically Angioplasty, on patients by learning from Electronic Health Record
      (EHR) data. To model this realistic task, we consider two types of exponential family members - multinomial distribution and Poisson distribution that model
      the target variable as categorical-valued and count-valued respectively. From the perspective of exponential family, we derive functional gradient boosting for these
      two distributions and analyze their assumptions with real EHR data. Our empirical results show that Poisson models appear to be more faithful for modeling the
      number of procedures given historical medical conditions about the target patients.},
    Pages = {},
    Title = {Modeling Heart Procedures from EHRs: An Application of Exponential Families},
    Url = {./papers/yang2017bibm.pdf},
    Year = {2017},
    Bdsk-Url-1 = {./papers/yang2017bibm.pdf}}

  @inproceedings{morris2017stochasticWL,
    Anote = {./images/morris2017stochasticWL.png},
    Author = {Christopher Morris and Kristian Kersting and Petra Mutzel },
    Booktitle = {Proceedings of the IEEE Conference on Data Mining (ICDM)},
    Keywords = {Weisfeiler Lehman, Rademacher Averages, Stochastic, k-dimensional, Graph Kernels, Bounds},
    Note = {Most state-of-the-art graph kernels only take local
graph properties into account, i.e., the kernel is computed with
regard to properties of the neighborhood of vertices or other
small substructures only. On the other hand, kernels that do
take global graph properties into account may not scale well to
large graph databases. Here we propose to start exploring the
space between local and global graph kernels, striking the balance
between both worlds. Specifically, we introduce a novel graph
kernel based on the k-dimensional Weisfeiler-Lehman algorithm,
and show that it takes local as well as global properties into
account. Unfortunately, the k-dimensional Weisfeiler-Lehman
algorithm scales exponentially in k. Consequently, we devise a
stochastic version of the kernel with provable approximation
guarantees using conditional Rademacher averages. On bounded degree
graphs, it can even be computed in constant time. We
support our theoretical results with experiments on several graph
classification benchmarks, showing that our kernels often outperform
the state-of-the-art in terms of classification accuracies.},
    Pages = {},
    Title = {Glocalized Weisfeiler-Lehman Graph Kernels: Global-Local Feature Maps of Graphs},
    Url = {./papers/morris2017icdm.pdf},
    Year = {2017},
    crossref = {https://github.com/chrsmrrs/glocalwl}
    }


  @article{kuska2017spectral,
    Anote = {./images/kuska2017spectral.jpg},
    Author = {Matheus Thomas Kuska and Anna Brugger and Stefan Thomas and Mirwaes Wahabzada and Kristian Kersting and Erich-Christian Oerke and Ulrike Steiner and Anne-Katrin Mahlein},
    Journal = {Phytopathology},
    Keywords = { Plant Phenotyping, Plant Diseases, Resistance, Hyperspectral Imaging, Machine Learning},
    Note = {Differences in early plant-pathogen interactions are mainly characterized by using destructive methods. Optical
      sensors are advanced techniques for phenotyping host-pathogen interactions on different scales and for detecting subtle plant resistance
      responses against pathogens. A microscope with a hyperspectral camera was used to study interactions between Blumeria graminis f. sp. hordei
      and barley (Hordeum vulgare) genotypes with high susceptibility or resistance due to hypersensitive response (HR) and papilla formation.
      Qualitative and quantitative assessment of pathogen development was used to explain changes in hyperspectral signatures.
      Within 48 hours after inoculation, genotype specific changes in the green and red range (500-690 nm) and a blue shift of the red edge
      inflection point were observed. Manual analysis indicated resistance-specific reflectance patterns from one to three days after inoculation.
      These changes could be linked to host plant modifications depending on individual host-pathogen interactions, respectively. Retrospective analysis of hyperspectral
      images revealed spectral characteristics of HR against B. graminis f. sp. hordei. For early HR detection, an advanced data mining approach localized HR spots
      before they became visible on the RGB images derived from hyperspectral imaging. The link among processes during pathogenesis and host resistance to changes
      in hyperspectral signatures, provide evidence that sensor based phenotyping is suitable to advance time-consuming and cost-expensive visual rating of plant disease resistances.},
    Pages = {1388-1398},
    Publisher = {APS},
    Title = {Spectral patterns reveal early resistance reactions of barley against Blumeria graminis f. sp. hordei},
    Url = {http://apsjournals.apsnet.org/doi/10.1094/PHYTO-04-17-0128-R},
    Volume = {107},
    numer = {11},
    Year = {2017},
    Bdsk-Url-1 = {http://apsjournals.apsnet.org/doi/10.1094/PHYTO-04-17-0128-R}}


@article{mahlein2017labfield,
  Anote = {./images/labfield2017.png},
  Author = {Anne-Katrin Mahlein and Matheus Kuska and Stefan Thomas and
    David Bohnenkamp and Elias Alisaac and Jens Behmann and Mirwaes Wahabzada and Kristian Kersting},
  Journal = {Advances in Animal Biosciences},
  Keywords = { Plant Phenotyping, Plant Diseases, Resistance, Hyperspectral Imaging, Machine Learning},
  Note = {The detection and identification of plant diseases is a fundamental task in sustainable crop production. An accurate estimate of disease incidence,
    disease severity and negative effects on yield quality and quantity is important for precision crop production, horticulture, plant breeding or fungicide
    screening as well as in basic and applied plant research. Particularly hyperspectral imaging of diseased plants offers insight into processes during pathogenesis.
    By hyperspectral imaging and subsequent data analysis routines, it was possible to realize an early detection, identification and quantification of different relevant plant diseases.
    Depending on the measuring scale, even subtle processes of defence and resistance mechanism of plants could be evaluated. Within this scope, recent results
    from studies in barley, wheat and sugar beet and their relevant foliar diseases will be presented.},
  Pages = {238-243},
  Publisher = {Cambridge University Press},
  Title = {Plant disease detection by hyperspectral imaging: from the lab to the field},
  Url = {https://www.cambridge.org/core/journals/advances-in-animal-biosciences/article/plant-disease-detection-by-hyperspectral-imaging-from-the-lab-to-the-field/AA32F5F78BB706729A20FA5289B34F21},
  Volume = {8(Proceedings of the European Conference on Precision Agriculture 2017)},
  Year = {2017},
  Bdsk-Url-1 = {https://www.cambridge.org/core/journals/advances-in-animal-biosciences/article/plant-disease-detection-by-hyperspectral-imaging-from-the-lab-to-the-field/AA32F5F78BB706729A20FA5289B34F21}}

@article{hallau2018cellphone,
  Anote = {./images/hallau2018cellphone.png},
  Author = {Lisa Hallau and Marion Neumann and Benjamin Klatt and Benno Kleinhenz and Thomas Klein and Christian Kuhn and Manfred Röhrig and Christian Bauckhage and Kristian Kersting and Anne-Katrin Mahlein and Ulrike Steiner and Erich-Christian Oerke},
  Journal = {Plant Pathology},
  Keywords = {Plant Phenotyping, Smartphone, Classification, Plant Disease},
  Note = {Cercospora leaf spot (CLS) poses a high economic risk to sugar beet production due to its potential to greatly reduce yield and quality. For
    successful integrated management of CLS a rapid and accurate means of identifying the disease is required. Accurate diagnosis on the basis of typical symptoms is
    often compromised by the inability to accurately differentiate CLS symptoms from those caused by other foliar pathogens of sugar beet of varying significance,
    or from abiotic stress. An automated detection and classification of CLS and similar leaf diseases, enabling a reliable basis for decisions in disease control,
    would be an alternative to molecular and serological methods. This paper presents an algorithm - based on a RGB-image database captured with smartphone cameras – for the identification of sugar beet leaf diseases that could be used by extension services and farmers in the field. },
  Pages = {399-410},
  Publisher = {Wiley},
  Title = {Automated identification of sugar beet diseases using smartphones},
  Url = {https://doi.org/10.1111/ppa.12741},
  Volume = {67},
  Number = {2},
  Year = {2018},
  Bdsk-Url-1 = {https://doi.org/10.1111/ppa.12741}}

@incollection{thimm2017lfu,
  Anote = {./images/thimm2017lfu.png},
  Author = {Matthias Thimm and Kristian Kersting},
  Booktitle = {Working Notes of the IJCAI 2017 Workshop on the Logical Foundations for Uncertainty and Machine Learning (LFU)},
  Keywords = {Argumentation, Frequent Itemsets, Classification},
  Note = {In this position paper, we envisage to significantly
generalize this reasoning aspect of machine learning towards
the use of computational models of argumentation, a popular approach to commonsense reasoning,
for reasoning within machine learning. More concretely, we
consider the following two-step classification approach. In
the first step, rule learning algorithms are used to extract frequent
patterns and rules from a given data set. The output of
this step comprises a huge number of rules (given fairly low
confidence and support parameters) and these cannot directly
be used for the purpose of classification as they are usually inconsistent
with one another. Therefore, in the second step, we
interpret these rules as the input for approaches to structured
argumentation. Using the argumentative
inference procedures of these approaches and given a new
observation, the classification of the new observation is determined
by constructing arguments on top of these rules for
the different classes and determining their justification status.},
  Pages = {},
  Publisher = {},
  Title = {Towards Argumentation-based Classification},
  Url = {./papers/thimm2017lfu.pdf},
  Year = {2017},
  Bdsk-Url-1 = {./papers/thimm2017lfu.pdf}}

@inproceedings{xu2017memoryNetworks,
  Anote = {./images/xu2017memoryNetworks.png},
  Author = {Zhao Xu and Romain Vial and Kristian Kersting },
  Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  Keywords = {Memory Models, Graph Laplacian, Attention Model, Deep Learning, Sentiment Analysis},
  Note = {Memory networks model information and knowledge as memories which can be manipulated for prediction, inference and reasoning
on the basis of attention mechanism in neural networks. In many cases,
there exist complicated relations between memories, by which the memories are linked together into graphs. Typical examples
include dependency tree of a sentence and knowledge graph in a dialogue system. In
this paper, we present graph enhanced memory networks to integrate
the relational information between memories into deep neural networks.
Our approach can exploit two types of attentions, graph- and content-based ones, to effectively identify the important memories for the given
question, and thus leads to a better inference and reasoning about the
final response. We demonstrate the eectiveness of the proposed approach
with an interesting application on aspect based sentiment classication.
The empirical analysis on real data shows the advantages of incorporating relational dependencies into the memory networks.},
  Pages = {},
  Title = {Graph Enhanced Memory Networks for Sentiment Analysis},
  Url = {./papers/xu2017memoryNetworks.pdf},
  Year = {2017},
  Bdsk-Url-1 = {./papers/xu2017memoryNetworks.pdf}}


  @inproceedings{gurevych2017interactive,
  	Anote = {./images/interactive-data-analytics.png},
  	Author = {Iryna Gurevych and Christian M. Meyer and Carsten Binnig and Johannes Fürnkranz and Kristian Kersting and Stefan Roth and Edwin Simpson },
  	Booktitle = {Proceedings of the 18th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing)},
  	Keywords = {Interactive Machine Learning, Digitial Humanities},
  	Note = {In this vision paper, we argue that current solutions to data analytics
are not suitable for complex tasks from the humanities, as they are agnostic
of the user and focused on static, predefined tasks with large-scale benchmarks.
Instead, we believe that the human must be put into the loop to address small
data scenarios that require expert domain knowledge and fluid, incrementally defined
tasks, which are common for many humanities use cases.},
  	Pages = {},
  	Title = {Interactive Data Analytics for the Humanities},
  	Url = {./papers/cicling2017-interactive-data-analytics.pdf},
  	Year = {2017},
  	Bdsk-Url-1 = {./papers/cicling2017-interactive-data-analytics.pdf}}


      @article{kriege2019unifying,
      	Anote = {./images/expliciteFMgraphs.png},
      	Author = {Nils Kriege and Marion Neumann and Christopher Morris and Kristian Kersting and Petra Mutzel},
        Journal = {Data Mining and Knowledge Discovary (DAMI) 33(6): 1505-1547; a previous version also as arXiv preprint arXiv:1703.00676},
      	Keywords = {Graph Kernels, Explicit Feature Map, Kernel Trick, Phase Transition},
      	Note = {Non-linear kernel methods can be approximated by fast linear ones using suitable explicit feature maps
allowing their application to large scale problems. We investigate how convolution kernels for structured
data are composed from base kernels and construct corresponding feature maps. On this basis we propose
exact and approximative feature maps for widely used graph kernels based on the kernel trick. We analyze
for which kernels and graph properties computation by explicit feature maps is feasible and actually more
efficient. In particular, we derive approximative, explicit feature maps for state-of-the-art kernels
supporting real-valued attributes including the GraphHopper and graph invariant kernels. In extensive
experiments we show that our approaches often achieve a classification accuracy close to the exact
methods based on the kernel trick, but require only a fraction of their running time. Moreover, we propose
and analyze algorithms for computing random walk, shortest-path and subgraph matching kernels by
explicit and implicit feature maps. Our theoretical results are confirmed experimentally by observing a
phase transition when comparing running time with respect to label diversity, walk lengths and subgraph
size, respectively.},
      	Title = {A Unifying View of Explicit and Implicit Feature Maps of Graph Kernels},
      	Url = {https://link.springer.com/article/10.1007/s10618-019-00652-0},
      	Year = {2019}}

  @misc{kriege2017unifying,
  	Anote = {./images/expliciteFMgraphs.png},
  	Author = {Nils Kriege and Marion Neumann and Christopher Morris and Kristian Kersting and Petra Mutzel},
  	Howpublished = {arXiv preprint arXiv:1703.00676},
  	Keywords = {Graph Kernels, Explicit Feature Map, Kernel Trick, Phase Transition},
  	Note = {We investigate how general convolution kernels are composed from base kernels and construct
    corresponding feature maps. We apply our results to widely used graph kernels and analyze for which kernels
    and graph properties computation by explicit feature maps is feasible and actually more efficient than
    implicite computations. In particular, we derive feature maps for random walk and subgraph matching kernels
    and apply them to real-world graphs with discrete labels.},
  	Title = {A Unifying View of Explicit and Implicit Feature Maps for Structured Data: Systematic Studies of Graph Kernels},
  	Url = {https://arxiv.org/pdf/1703.00676.pdf},
  	Year = {2017},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1703.00676.pdf}}

  @misc{morris2017global,
  	Anote = {./images/globalWL2017.png},
  	Author = {Christopher Morris and Kristian Kersting and Petra Mutzel},
  	Howpublished = {arXiv preprint arXiv:1703.02379},
  	Keywords = {Graph Kernel, k-Dimensional Weifeiler Lehman, Randomization},
  	Note = {We introduce a novel graph kernel based on the k-dimensional Weisfeiler-Lehman algorithm, and show that it takes local as well as global properties into account. Unfortunately, the k-dimensional Weisfeiler-Lehman scales exponentially in k. Consequently, we devise a stochastic version of the kernel with provable approximation guarantees using conditional Rademacher averages. On bounded-degree graphs, it can even be computed in constant time. We support our theoretical results with experiments on several graph classification benchmarks, showing that our kernels often outperform the state-of-the-art in terms of classification accuracies.},
  	Title = {Global Weisfeiler-Lehman Graph Kernels},
  	Url = {https://arxiv.org/pdf/1703.02379.pdf},
  	Year = {2017},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1703.02379.pdf}}

  @inproceedings{molina2017poisson,
  	Anote = {./images/molina2017aaai_pspn.png},
  	Author = {Alejandro Molina and Sriraam Natarajan and Kristian Kersting},
  	Booktitle = {Proceedings of the 31st {AAAI} Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Multivariate Poisson, Sum Product Networks, Efficient Inference, Structure Learning, Deep Learning},
  	Note = {Multivariate count data are pervasive in science in the form
of histograms, contingency tables and others. Previous work
on modeling this type of distributions do not allow for fast
and tractable inference. In this paper we present a novel Poisson
graphical model, the first based on sum product networks,
called PSPN, allowing for positive as well as negative dependencies.
We present algorithms for learning tree PSPNs from
data as well as for tractable inference via symbolic evaluation.
With these, information-theoretic measures such as entropy,
mutual information, and distances among count variables can
be computed without resorting to approximations. Additionally,
we show a connection between PSPNs and LDA, linking
the structure of tree PSPNs to a hierarchy of topics. The
experimental results on several synthetic and real world datasets
demonstrate that PSPN often outperform state-of-the-art
while remaining tractable.},
  	Pages = {1199--1205},
  	Title = {Poisson Sum-Product Networks: A Deep Architecture for Tractable Multivariate Poisson Distributions},
  	Url = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14530/14416},
      Crossref = {https://github.com/SPFlow/SPFlow},
  	Year = {2017},
  	Bdsk-Url-1 = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14530/14416}}

  @inproceedings{xu2017anomaly,
  	Anote = {./images/ijcai2017anomaly.png},
  	Author = {Zhao Xu and Kristian Kersting and Lorenzo {von Ritter}},
  	Booktitle = {Proceedings of the 26th International Joint Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Anomaliy Detection, Streams, Gaussian Processes, OLAD, Online},
  	Note = {Detecting anomalous activities from time series is critical for enhancing availability and security of systems in many domains. In real applications, the time series often arrive sequentially without fix length, and usually only a single scan is allowed through the data. In such situations, classical batch learning methods as well as retrospective segmentation methods would be less flexible for anomaly detection. Instead, we propose an online nonparametric Bayesian method OLAD for anomaly detection in streaming time series collected continuously. The method can effectively learn the underlying dynamics of anomaly-contaminated heavytailed time series and identify potential anomalous events. Empirical evaluations on both synthetic and real-world datasets demonstrates the effectiveness of our method.},
  	Title = {Robust Online Anomaly Detection for Streaming Time Series},
    Url = {./papers/xu2017anomaly.pdf},
  	Year = {2017}}

  @inproceedings{mladenovBK17,
  	Anote = {./images/adds_aaai2017.png},
  	Author = {Martin Mladenov and Vaishak Belle and Kristian Kersting},
  	Booktitle = {Proceedings of the 31st {AAAI} Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Symbolic Numerical Inference, Algebraic Decision Diagrams, Matrix-free Optimization},
  	Note = {Numerical optimization is arguably the most prominent computational framework in machine learning and AI. It can be seen as an assembly language for hard combinatorial problems ranging from classification and regression in learning, to computing optimal policies and equilibria in decision theory, to entropy minimization in information sciences. Unfortunately, specifying such problems in complex domains involving relations, objects and other logical dependencies is cumbersome at best, requiring considerable expert knowledge, and solvers require models to be painstakingly reduced to standard forms. To overcome this, we introduce a rich modeling framework for optimization problems that allows convenient codification of symbolic structure. Rather than reducing this symbolic structure to a sparse or dense matrix, we represent and exploit it directly using algebraic decision diagrams (ADDs). Combining efficient ADD-based matrix-vector algebra with a matrix-free interior-point method, we develop an engine that can fully leverage the structure of symbolic representations to solve convex linear and quadratic optimization problems. We demonstrate the flexibility of the resulting symbolic-numeric optimizer on decision making and compressed sensing tasks with millions of non-zero entries.},
  	Pages = {1199--1205},
  	Title = {The Symbolic Interior Point Method},
  	Url = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14966/13901},
  	Year = {2017},
  	Bdsk-Url-1 = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14966/13901}}

  @inproceedings{mladenovKK17,
  	Anote = {./images/lqp_aaai2017.png},
  	Author = {Martin Mladenov and Leonard Kleinhans and Kristian Kersting},
  	Booktitle = {Proceedings of the 31st {AAAI} Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Symmetries, Quadratic Programs, Weisfeiler Lehmann, SVM},
  	Note = {Symmetry is the essential element of lifted inference that has
recently demonstrated the possibility to perform very efficient
inference in highly-connected, but symmetric probabilistic
models. This raises the question, whether this holds for optimization
problems in general. Here we show that for a large
class of optimization methods this is actually the case. Specifically,
we introduce the concept of fractional symmetries of
convex quadratic programs (QPs), which lie at the heart of
many AI and machine learning approaches, and exploit it to
lift, i.e., to compress QPs. These lifted QPs can then be tackled
with the usual optimization toolbox (off-the-shelf solvers, cutting
plane algorithms, stochastic gradients etc.). If the original
QP exhibits symmetry, then the lifted one will generally be
more compact, and hence more efficient to solve.},
  	Pages = {2350--2356},
  	Title = {Lifted Inference for Convex Quadratic Programs},
  	Url = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14505/14415},
  	Year = {2017},
  	Bdsk-Url-1 = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14505/14415}}

  @article{serong2017publizistik,
  	Anote = {./images/publizistik2017.png},
  	Author = {Julia Serong and Lars Koppers and Edith Luschmann and Alejandro Molina and Kristian Kersting and Jörg Rahnenführer and Holger Wormer},
  	Journal = {Publizistik},
  	Keywords = {Datajournalism},
  	Note = {Die Qualität der Wissenschaftskommunikation in Deutschland ist Gegenstand intensiver Debatten. Empirische Daten zu Inhalt und Umfang sind jedoch rar. Hier präsentieren wir eine deskriptiven Längsschnittstudie zu 300000 Pressemitteilungen von Forschungseinrichtungen, die vom idw im Zeitraum von 1995 bis 2015 verbreitet wurden.},
  	Pages = {1--26},
  	Publisher = {Springer},
  	Title = {Öffentlichkeitsorientierung von Wissenschaftsinstitutionen und Wissenschaftsdisziplinen: Eine Längsschnittanalyse des „Informationsdienstes Wissenschaft`` (idw) 1995--2015},
  	Url = {./paper/serong2017publizistik.pdf},
  	Volume = {132},
  	Year = {2017},
  	Bdsk-Url-1 = {https://link.springer.com/article/10.1007/s11616-017-0336-6}}

  @article{kersting2017relational,
  	Anote = {./images/rlp_aij2017.png},
  	Author = {Kristian Kersting and Martin Mladenov and Pavel Tokmakov},
  	Journal = {Artificial Intelligence (AIJ)},
  	Keywords = {Relational Linear Programs, Statistical Relational AI, Lifted Inference, Symmetries, Weisfeiler Lehmann},
  	Note = {We propose relational linear programs. Together with a logical knowledge base, effectively a logic program consisting of logical facts and rules, a relational LP induces a ground LP. This ground LP is solved using lifted linear programming. That is, symmetries within the ground LP are employed to reduce its dimensionality, and the reduced program is solved using any off-the-shelf LP solver. We illustrate this empirically by experiments on approximate inference in Markov logic networks using LP relaxations, on solving Markov decision processes, and on collective inference using LP support vector machines.},
  	Pages = {188-216},
  	Publisher = {Elsevier},
  	Title = {Relational linear programming},
  	Url = {http://www.sciencedirect.com/science/article/pii/S0004370215001010},
  	Volume = {244},
  	Year = {2017},
  	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0004370215001010}}

  @article{alfeld2017simplex,
  	Anote = {./images/microchemical2017.png},
  	Author = {Matthias Alfeld and Mirwaes Wahabzada and Christian Bauckhage and Kristian Kersting and Geert {van der Snickt} and Petria Noble and Koen Janssens and Gerd Wellenreuther and Gerald Falkenberg},
  	Journal = {Microchemical Journal},
  	Keywords = {Interpretable Matrix Factorization, Simplex Volume Maximization, Material Science},
  	Note = {Matrix factorization, the representation of data sets by bases (or loads) and coefficient (or score) images is long used to support the interpretation of complex data sets. We propose in this publication Simplex Volume Maximization (SiVM) for the analysis of X-ray fluorescence (XRF) imaging data sets. SiVM selects archetypical data points that represents the data set and thus provides easily understandable bases, preserves the non-negative character of XRF data sets and has low demands concerning computing resources. We apply SiVM on an XRF data set of Hans Memling's Portrait of a man from the Lespinette family from the collection of the Mauritshuis (The Hague, NL) and discuss capabilities and shortcomings of SiVM.},
  	Pages = {179-184},
  	Publisher = {Elsevier},
  	Title = {Simplex Volume Maximization (SiVM): A matrix factorization algorithm with non-negative constrains and low computing demands for the interpretation of full spectral X-ray fluorescence imaging data},
  	Url = {http://www.sciencedirect.com/science/article/pii/S0026265X16304374},
  	Volume = {132},
  	Year = {2017},
  	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/pii/S0026265X16304374}}

  @inproceedings{morris2016faster,
  	Anote = {./images/icdm2016kernel.png},
  	Author = {Christopher Morris and Nils Kriege and Kristian Kersting and Petra Mutzel},
  	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Graph Kernels, Continuous Attributes, Hashing},
  	Note = {While state-of-the-art kernels for graphs with discrete labels scale well to graphs with thousands of nodes, the few existing kernels for graphs with continuous attributes, unfortunately, do not scale well. To overcome this limitation, we present hash graph kernels, a general framework to derive kernels for graphs with continuous attributes from discrete ones. The idea is to iteratively turn continuous attributes into discrete labels using randomized hash functions. We illustrate hash graph kernels for the Weisfeiler-Lehman subtree kernel and for the shortest-path kernel. The resulting novel graph kernels are shown to be, both, able to handle graphs with continuous attributes and scalable to large graphs and data sets. This is supported by our theoretical analysis and demonstrated by an extensive experimental evaluation.},
  	Title = {Faster Kernels for Graphs with Continuous Attributes via Hashing},
  	Url = {https://arxiv.org/pdf/1610.00064.pdf},
  	Year = {2016},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1610.00064.pdf}}

  @inproceedings{yang2016learning,
  	Anote = {./images/yang_aaai2016.png},
  	Author = {Shuo Yang and Tushar Khot and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Proceedings of the 30th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Continous Time Bayesian Networks, Statistical Relational Learning, Functional Gradient Boosting},
  	Note = {We develop Relational Continuous-Time Bayesian Networks (RCTBNs). They feature a nonparametric learning method that allows for efficiently learning the complex dependencies and their strengths simultaneously from sequence data. Our experimental results demonstrate that RCTBNs can learn as effectively as state-of-the-art approaches for propositional tasks while modeling relational tasks faithfully.},
  	Title = {Learning Continuous-Time Bayesian Networks in Relational Domains: A Non-Parametric Approach},
  	Url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/11955/11871},
  	Year = {2016},
  	Bdsk-Url-1 = {https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/11955/11871}}

  @inproceedings{das2016scaling,
  	Anote = {./images/16-SDM-fig.jpg},
  	Author = {Mayukh Das and Yuqing Wu and Tushar Khot and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Proceedings of the SIAM Conference on Data Mining (SDM)},
  	Keywords = {Lifted Inference, Graph Databases, Approximate Counting},
  	Note = {Over the past decade, exploiting relations and symmetries within probabilistic models has been proven to be surprisingly effective at solving large scale data mining problems. One of the key operations inside these lifted approaches is counting - be it for parameter/structure learning or for efficient inference. This paper demonstrates that `Compilation to Graph Databases' could be a practical tool for scaling lifted probabilistic inference and learning methods via counting in graph databases.},
  	Organization = {SIAM},
  	Pages = {738-746},
  	Title = {Scaling Lifted Probabilistic Inference and Learning Via Graph Databases},
  	Url = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611974348.83},
  	Year = {2016},
  	Bdsk-Url-1 = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611974348.83}}

  @incollection{habel2016traffic,
  	Anote = {./images/granularFlow2015.png},
  	Author = {Lars Habel and Alejandro Molina and Thomas Zaksek and Kristian Kersting and Michael Schreckenberg},
  	Booktitle = {Traffic and Granular Flow 2015},
  	Keywords = {Traffic, Multivariate Poisson, Dependeny Networks, Functional Gradient Boosting},
  	Note = {For the real-time microscopic simulation of traffic on a real-world road network, a continuous input stream of empirical data from different locations is usually needed to achieve good results. Traffic flows for example are needed to properly simulate the influence of slip roads and motorway exits. However, quality and reliability of empirical traffic data is sometimes a problem for example because of damaged detectors, transmission errors or simply lane diversions at road works. In this contribution, we attempt to close those data gaps of missing traffic flows with processed historical traffic data. Therefore, we compare a temporal approach based on exponential smoothing with a data-driven approach based on Poisson Dependency Networks.},
  	Pages = {491--498},
  	Publisher = {Springer},
  	Title = {Traffic Simulations with Empirical Data: How to Replace Missing Traffic Flows?},
  	Url = {https://link.springer.com/chapter/10.1007/978-3-319-33482-0_62},
  	Year = {2016},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-319-33482-0_62}}

  @incollection{mladenovHKGK16,
  	Anote = {./images/reloop2016.png},
  	Author = {Martin Mladenov and Danny Heinrich and Leonard Kleinhans and Felix Gonsior and Kristian Kersting},
  	Booktitle = {Working Notes of the AAAI 2016 Workshop on Declarative Learning Based Programming},
  	Keywords = {Relational Linear Programming, Statistical Relational AI, Symmetries, Lifted Inference},
  	Note = {We present RELOOP, a domain-specific language for relational optimization embedded in Python. It allows the user to express relational optimization problems in a natural syntax that follows logic and linear algebra, rather than in the restrictive standard form required by solvers, and can automatically compile the model to a lower-order but equivalent model. Moreover, RELOOP makes it easy to combine relational optimization with high-level features of Python such as loops, parallelism and interfaces to relational databases. RELOOP is available at http://www-ai.cs.uni-dortmund.de/weblab/ static/RLP/html/ along with documentation and examples.},
  	Title = {{RELOOP:} {A} Python-Embedded Declarative Language for Relational Optimization},
  	Url = {http://www.aaai.org/ocs/index.php/WS/AAAIW16/paper/download/12614/12396},
  	Year = {2016},
  	Bdsk-Url-1 = {http://www.aaai.org/ocs/index.php/WS/AAAIW16/paper/download/12614/12396}}

  @incollection{natarajanSWVK16,
  	Anote = {./images/deepSupervision2016.png},
  	Author = {Sriraam Natarajan and Ameet Soni and Anurag Wazalwar and Dileep Viswanathan and Kristian Kersting},
  	Booktitle = {Solving Large Scale Learning Tasks: Challenges and Algorithms - Essays Dedicated to Katharina Morik on the Occasion of Her 60th Birthday},
  	Keywords = {Distance Supervision, Background Knowledge, Statistical Relational Learning, Functional Gradient Boosting},
  	Note = {Most distant supervision methods rely on a given set of propositions as a source of supervision. We propose a different approach: we infer weakly supervised examples for relations from statistical relational models learned by using knowledge outside the natural language task. We argue that this deep distant supervision creates more robust examples that are particularly useful when learning the entire model (the structure and parameters). We demonstrate on several domains that this form of weak supervision improves performance when learning structure.},
  	Pages = {331--345},
  	Title = {Deep Distant Supervision: Learning Statistical Relational Models for Weak Supervision in Natural Language Extraction},
  	Url = {http://dx.doi.org/10.1007/978-3-319-41706-6_18},
  	Year = {2016},
  	Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-319-41706-6_18}}

  @incollection{erdmannBKNPMMMR16,
  	Anote = {./images/erdmann2016.png},
  	Author = {Elena Erdmann and Karin Boczek and Lars Koppers and Gerret von Nordheim and Christian Pölitz and Alejandro Molina and Katharina Morik and Henrik Müller and Jörg Rahnenführer and Kristian Kersting},
  	Booktitle = {Working Notes of the ICML Workshop #Data4Good: Machine Learning in Social Good Applications},
  	Keywords = {Datajournalism},
  	Note = {Migration crisis, climate change or tax havens: Global challenges need global solutions. But agreeing on a joint approach is difficult without a common ground for discussion. Public spheres are highly segmented because news are mainly produced and received on a national level. Gaining a global view on international debates about important issues is hindered by the enormous quantity of news and by language barriers. Media analysis usually focuses only on qualitative research. In this position statement, we argue that it is imperative to pool methods from machine learning, journalism studies and statistics to help bridging the segmented data of the international public sphere, using the Transatlantic Trade and Investment Partnership (TTIP) as a case study.},
  	Title = {Machine Learning meets Data-Driven Journalism: Boosting International Understanding and Transparency in News Coverage},
  	Url = {https://arxiv.org/pdf/1606.05110.pdf},
  	Volume = {abs/1606.05110},
  	Year = {2016},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1606.05110.pdf}}

  @inbook{kersting2016feeding,
  	Anote = {./images/kersting2016feeding.png},
  	Author = {Kristian Kersting and Christian Bauckhage and Mirwaes Wahabzada and Anne-Katrin Mahlein and Ulrike Steiner and Erich-Christian Oerke and Christoph Römer and Lutz Plümer},
  	Booktitle = {Computational Sustainability},
  	Keywords = {Sustainability, Plant Phenotyping, Hyperspectral Images, Simplex Volume Maximiaztion},
  	Note = {Modern communication, sensing, and actuator technologies as well as methods from signal processing, pattern recognition, and data mining are increasingly applied in agriculture, ultimately helping to meet the challenge of ``How to feed a hungry world?'' Developments such as increased mobility, wireless networks, new environmental sensors, robots, and the computational cloud put the vision of a sustainable agriculture for anybody, anytime, and anywhere within reach. Unfortunately, data-driven agriculture also presents unique computational problems in scale and interpretability: (1) Data is gathered often at massive scale, and (2) researchers and experts of complementary skills have to cooperate in order to develop models and tools for data intensive discovery that yield easy-to-interpret insights for users that are not necessarily trained computer scientists. On the problem of mining hyperspectral images to uncover spectral characteristic and dynamics of drought stressed plants, we showcase that both challenges can be met and that big data mining can---and should---play a key role for feeding the world, while enriching and transforming data mining.},
  	Pages = {99-120},
  	Publisher = {Springer},
  	Title = {Feeding the World with Big Data: Uncovering Spectral Characteristics and Dynamics of Stressed Plants},
  	Url = {https://link.springer.com/chapter/10.1007%2F978-3-319-31858-5_6},
  	Year = {2016},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007%2F978-3-319-31858-5_6}}

  @book{laessig2016,
  	Anote = {./images/computational-sustainability.jpg},
  	Author = {Jörg L{\"a}ssig and Kristian Kersting and Katharina Morik},
  	Keywords = {Edited Volume, Sustainability},
  	Note = {This book examines the foundations of combining logic and probability into what are called relational probabilistic models. It introduces representations, inference, and learning techniques for probability, logic, and their combinations.},
  	Publisher = {Springer},
  	Series = {Studies in Computational Intelligence},
  	Title = {Computational Sustainability},
  	Url = {https://link.springer.com/book/10.1007%2F978-3-319-31858-5},
  	Volume = {645},
  	Year = {2016},
  	Bdsk-Url-1 = {https://link.springer.com/book/10.1007%2F978-3-319-31858-5}}

  @book{deraedt2016,
  	Anote = {./images/starai.jpg},
  	Author = {Luc {De Raedt} and Kristian Kersting and Sriraam Natarajan and David Poole},
  	Keywords = {Statistical Relational Learning, Statistical Relational AI, Introduction},
  	Note = {This book examines the foundations of combining logic and probability into what are called relational probabilistic models. It introduces representations, inference, and learning techniques for probability, logic, and their combinations.},
  	Publisher = {Morgan {\&} Claypool Publishers},
  	Series = {Synthesis Lectures on Artificial Intelligence and Machine Learning},
  	Title = {Statistical Relational Artificial Intelligence: Logic, Probability, and Computation},
  	Url = {http://www.morganclaypool.com/doi/10.2200/S00692ED1V01Y201601AIM032},
  	Year = {2016},
    Crossref = {https://starling.utdallas.edu/software/boostsrl/wiki/},
  	Bdsk-Url-1 = {http://www.morganclaypool.com/doi/10.2200/S00692ED1V01Y201601AIM032}}

  @article{szymanskiKK16,
  	Anote = {./images/entropy2016.png},
  	Author = {Piotr Szymanski and Tomasz Kajdanowicz and Kristian Kersting},
  	Journal = {Entropy},
  	Keywords = {Multi-Label Classification, Community Detection},
  	Note = {We propose using five data-driven community detection approaches from social networks to partition the label space for the task of multi-label classification as an alternative to random partitioning into equal subsets as performed by RAkELd. We show that in almost all cases these educated guess approaches are more likely to outperform RAkELd in all measures, but Hamming Loss.},
  	Number = {8},
  	Pages = {282},
  	Title = {How Is a Data-Driven Approach Better than Random Choice in Label Space Division for Multi-Label Classification?},
  	Url = {http://www.mdpi.com/1099-4300/18/8/282/pdf},
  	Volume = {18},
  	Year = {2016},
  	Bdsk-Url-1 = {http://www.mdpi.com/1099-4300/18/8/282/pdf}}

  @article{alfeld2016non,
  	Anote = {./images/synchro2016.gif},
  	Author = {Matthias Alfeld and Mirwaes Wahabzada and Christian Bauckhage and Kristian Kersting and Gerd Wellenreuther and Pere Barriobero-Vila and Guillermo Requena and Ulrike Boesenberg and Gerald Falkenberg},
  	Journal = {Journal of Synchrotron Radiation},
  	Keywords = {Interpretable Matrix Factorization, Simplex Volume Maximization, Material Science},
  	Note = {Elemental distribution images acquired by imaging X-ray fluorescence analysis can contain high degrees of redundancy and weakly discernible correlations. In this article near real-time non-negative matrix factorization (NMF) is described for the analysis of a number of data sets acquired from samples of a bi-modal α+β Ti-6Al-6V-2Sn alloy. NMF was used for the first time to reveal absorption artefacts in the elemental distribution images of the samples, where two phases of the alloy, namely α and β, were in superposition. The findings and interpretation of the NMF results were confirmed by Monte Carlo simulation of the layered alloy system. Furthermore, it is shown how the simultaneous factorization of several stacks of elemental distribution images provides uniform basis vectors and consequently simplifies the interpretation of the representation.},
  	Number = {2},
  	Publisher = {International Union of Crystallography},
  	Title = {Non-negative matrix factorization for the near real-time interpretation of absorption effects in elemental distribution images acquired by X-ray fluorescence imaging},
  	Url = {./papers/synchro2016.pdf},
  	Volume = {23},
  	Year = {2016},
  	Bdsk-Url-1 = {http://scripts.iucr.org/cgi-bin/paper?cnor=hf5308}}

  @article{wahabzada2016plant,
  	Anote = {./images/srep22482-f1.jpg},
  	Author = {Mirwaes Wahabzada and Anne-Katrin Mahlein and Christian Bauckhage and Ulrike Steiner and Erich-Christian Oerke and Kristian Kersting},
  	Journal = {Scientific Reports (Nature)},
  	Keywords = {Hyperspectral Images, Plant Phenotpying, Latent Dirichlet Allocation, Sustainability},
  	Note = {We present an approach to plant phenotyping that integrates non-invasive sensors, computer vision, as well as data mining techniques and allows for monitoring how plants respond to stress. To uncover latent hyperspectral characteristics of diseased plants reliably and in an easy-to-understand way, we ``wordify'' the hyperspectral images, i.e., we turn the images into a corpus of text documents and apply probabilistic topic models. Based on recent regularized topic models, we demonstrate that one can track automatically the development of three foliar diseases of barley.},
  	Publisher = {Nature Publishing Group},
  	Title = {Plant phenotyping using probabilistic topic models: uncovering the hyperspectral language of plants},
  	Url = {http://www.nature.com/articles/srep22482},
  	Volume = {6},
  	Year = {2016},
  	Bdsk-Url-1 = {http://www.nature.com/articles/srep22482}}

  @article{neumann2016propagation,
  	Anote = {./images/propKernel_mlg2016.png},
  	Author = {Marion Neumann and Roman Garnett and Christian Bauckhage and Kristian Kersting},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Graph Kernels, Weisfeiler Lehmann, Belief Propagation, Random Walks, Information Propagation},
  	Note = {We introduce propagation kernels, a general graph-kernel framework for efficiently measuring the similarity of structured data. Propagation kernels are based on monitoring how information spreads through a set of given graphs. They leverage early-stage distributions from propagation schemes such as random walks to capture structural information encoded in node labels, attributes, and edge information.},
  	Number = {2},
  	Pages = {209--245},
  	Publisher = {Springer},
  	Title = {Propagation kernels: efficient graph kernels from propagated information},
  	Url = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5517-9.pdf},
  	Volume = {102},
  	Year = {2016},
  	Bdsk-Url-1 = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5517-9.pdf}}

  @article{bauckhage2016collective,
  	Anote = {./images/collective2016fat.png},
  	Author = {Christian Bauckhage and Kristian Kersting},
  	Journal = {Foundations and Trends in Web Science},
  	Keywords = {Collective Attention, Memes, Distance Distribution, Generalized Gamma},
  	Note = {Understanding the dynamics of collective human attention has been called a key scientific challenge for the information age. Tackling this challenge, this monograph explores the dynamics of collective attention related to Internet phenomena such as Internet memes, viral videos, or social media platforms and Web-based businesses. We discuss mathematical models that provide plausible explanations as to what drives the apparently dominant dynamics of rapid initial growth and prolonged decline.},
  	Number = {1-2},
  	Pages = {1--136},
  	Publisher = {Now Publishers, Inc.},
  	Title = {Collective Attention on the Web},
  	Url = {http://www.nowpublishers.com/article/Details/WEB-024},
  	Volume = {5},
  	Year = {2016},
  	Bdsk-Url-1 = {http://www.nowpublishers.com/article/Details/WEB-024}}

  @article{leuker2016hyperspectral,
  	Anote = {./images/leuker2016hyperspectral.gif},
  	Author = {Marlene Leucker and Mirwaes Wahabzada and Kristian Kersting and Madlaina Peter and Werner Beyer and Ulrike Steiner and Anne-Katrin Mahlein and Erich-Christian Oerke},
  	Journal = {Functional Plant Biology},
  	Keywords = {Hyperspectral Images, Genetics, Sustainability, Plant Phenotyping, Quantitative Trait Loci, Simplex Volume Maximization},
  	Note = {The quantitative resistance of sugar beet (Beta vulgaris L.) against Cercospora leaf spot (CLS) caused by Cercospora beticola (Sacc.) was characterised by hyperspectral imaging. Two closely related inbred lines, differing in two quantitative trait loci (QTL), which made a difference in disease severity of 1.1--1.7 on the standard scoring scale (1--9), were investigated under controlled conditions. The temporal and spatial development of CLS lesions on the two genotypes were monitored using a hyperspectral microscope. The lesion development on the QTL-carrying, resistant genotype was characterised by a fast and abrupt change in spectral reflectance, whereas it was slower and ultimately more severe on the genotype lacking the QTL. An efficient approach for clustering of hyperspectral signatures was adapted in order to reveal resistance characteristics automatically. The presented method allowed a fast and reliable differentiation of CLS dynamics and lesion composition providing a promising tool to improve resistance breeding by objective and precise plant phenotyping.},
  	Pages = {1-9},
  	Publisher = {Csiro Publishing},
  	Title = {Hyperspectral imaging reveals the effect of sugar beet QTLs on Cercospora leaf spot resistance},
  	Url = {http://www.publish.csiro.au/FP/FP16121},
  	Volume = {44},
  	Year = {2016},
  	Bdsk-Url-1 = {http://www.publish.csiro.au/FP/FP16121}}

  @misc{bauckhage2015maximum,
  	Anote = {./images/bauckhage2015maxent.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Fabian Hadiji},
  	Howpublished = {arXiv preprint arXiv:1501.04232},
  	Keywords = {Generalzed Gamma, Shortest Path, Distances Distribution, Maxumum Entropy, Undirected Network},
  	Note = {Properties of networks are often characterized in terms of features such as node degree distributions, average path lengths, diameters, or clustering coefficients. Here, we study shortest path length distributions. On the one hand, average as well as maximum distances can be determined therefrom; on the other hand, they are closely related to the dynamics of network spreading processes. Because of the combinatorial nature of networks, we apply maximum entropy arguments to derive a general, physically plausible model. In particular, we establish the generalized Gamma distribution as a continuous characterization of shortest path length histograms of networks or arbitrary topology. Experimental evaluations corroborate our theoretical results.},
  	Title = {Maximum Entropy Models of Shortest Path and Outbreak Distributions in Networks},
  	Url = {https://arxiv.org/pdf/1501.04232.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1501.04232.pdf}}

  @inproceedings{natarajan2015effectively,
  	Anote = {./images/ilp2015KBsupervision.png},
  	Author = {Sriraam Natarajan and Jose Picado and Tushar Khot and Kristian Kersting and Christopher Re and Jude Shavlik},
  	Booktitle = {Proceedings of the 25th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Domain Knowledge, Statistical Relational Learning, Functional Gradient Boosting},
  	Note = {One of the challenges to information extraction is the requirement of human annotated examples, commonly called gold-standard examples. Many successful approaches alleviate this problem by employing some form of distant supervision, i.e., look into knowledge bases such as Freebase as a source of supervision to create more examples. While this is perfectly reasonable, most distant supervision methods rely on a hand-coded background knowledge that explicitly looks for patterns in text. For example, they assume all sentences containing Person X and Person Y are positive examples of the relation married(X, Y). In this work, we take a different approach -- we infer weakly supervised examples for relations from models learned by using knowledge outside the natural language task. We argue that this method creates more robust examples that are particularly useful when learning the entire information-extraction model (the structure and parameters). We demonstrate on three domains that this form of weak supervision yields superior results when learning structure compared to using distant supervision labels or a smaller set of gold-standard labels.},
  	Pages = {92--107},
  	Publisher = {Springer},
  	Title = {Effectively creating weakly labeled training examples via approximate domain knowledge},
  	Url = {./papers/ilp2015KBsupervision.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {./papers/ilp2015KBsupervision.pdf}}

  @inproceedings{yang2015modeling,
  	Anote = {./images/aime2015.png},
  	Author = {Shuo Yang and Kristian Kersting and Greg Terry and Jefferey Carr and Sriraam Natarajan},
  	Booktitle = {Proceedings of the Conference on Artificial Intelligence in Medicine in Europe (AIME)},
  	Keywords = {Dynamic Baysian Networks, Coronary Artery Calcification, Behavioral Data},
  	Note = {Cardiovascular disease (CVD) is one of the key causes for death worldwide. We consider the problem of modeling an imaging biomarker, Coronary Artery Calcification (CAC) measured by computed tomography, based on behavioral data. We employ the formalism of Dynamic Bayesian Network (DBN) and learn a DBN from these data. Our learned DBN provides insights about the associations of specific risk factors with CAC levels. Exhaustive empirical results demonstrate that the proposed learning method yields reasonable performance during cross-validation.},
  	Organization = {Springer},
  	Pages = {182--187},
  	Title = {Modeling Coronary Artery Calcification Levels from Behavioral Data in a Clinical Study},
  	Url = {https://pdfs.semanticscholar.org/0a36/1d0f57bb7458d265bebd564e03eba0959dda.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {https://pdfs.semanticscholar.org/0a36/1d0f57bb7458d265bebd564e03eba0959dda.pdf}}

  @inproceedings{bauckhage2015viral,
  	Anote = {./images/icwsm2015.png},
  	Author = {Christian Bauckhage and Fabian Hadiji and Kristian Kersting},
  	Booktitle = {Proceedings of the 9th International Conference on Web and Social Media (ICWSM)},
  	Keywords = {Compartment Models, SIR Model, Viral Models},
  	Note = {Within only a few years after the launch of video sharing platforms, viral videos have become a pervasive Internet phenomenon. Yet, notwithstanding growing scholarly interest, the suitability of the viral metaphor seems not to have been studied so far. In this paper, we therefore investigate the attention dynamics of viral videos from the point of view of mathematical epidemiology. We introduce a novel probabilistic model of the progression of infective diseases and use it to analyze time series of YouTube view counts and Google searches. Our results on a data set of almost 800 videos show that their attention dynamics are indeed well accounted for by our epidemic model. In particular, we find that the vast majority of videos considered in this study show very high infection rates.},
  	Pages = {22--30},
  	Title = {How Viral Are Viral Videos?},
  	Url = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/viewFile/10505/10485},
  	Year = {2015},
  	Bdsk-Url-1 = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/viewFile/10505/10485}}

  @inproceedings{mladenov2015equitable,
  	Anote = {./images/uai2015lift.png},
  	Author = {Martin Mladenov and Kristian Kersting},
  	Booktitle = {Proceedings of the Thirty-First Conference on Uncertainty in Artificial Intelligence (UAI)},
  	Keywords = {Statistical Relational AI, Lifed Inference, Symmetries, Free Energies, Weifeiler Lehmann, Marginals},
  	Note = {Significant progress has recently been made towards formalizing symmetry-aware variational inference approaches into a coherent framework. With the exception of TRW for marginal inference, however, this framework resulted in approximate MAP algorithms only, based on equitable and orbit partitions of the graphical model. Here, we deepen our understanding of it for marginal inference. We show that a large class of concave free energies admits equitable partitions, of which orbit partitions are a special case, that can be exploited for lifting. Although already interesting on its own, we go one step further. We demonstrate that concave free energies of pairwise models can be reparametrized so that existing convergent algorithms for lifted marginal inference can be used without modification.},
  	Pages = {602--611},
  	Title = {Equitable Partitions of Concave Free Energies},
  	Url = {http://auai.org/uai2015/proceedings/papers/228.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {http://auai.org/uai2015/proceedings/papers/228.pdf}}

  @inproceedings{bauckhage2015parameterizing,
  	Anote = {./images/uai2015distance.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Fabian Hadiji},
  	Booktitle = {Proceedings of the 31st Conference on Uncertainty in Artificial Intelligence (UAI)},
  	Keywords = {Generalized Gamma, Distance Distribution, Shortest Path, Undirected Networks, Maximum Entropy},
  	Note = {We apply maximum entropy arguments to derive a general, physically plausible model of path length histograms. Based on the model, we then establish the generalized Gamma as a three-parameter distribution for shortest-path distance in strongly-connected, undirected networks. Extensive experiments corroborate our theoretical results, which thus provide new approaches to network analysis.},
  	Pages = {121--130},
  	Title = {Parameterizing the Distance Distribution of Undirected Networks},
  	Url = {http://auai.org/uai2015/proceedings/papers/62.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {http://auai.org/uai2015/proceedings/papers/62.pdf}}

  @inproceedings{hadiji2015computer,
  	Anote = {./images/ijcai2015move.png},
  	Author = {Fabian Hadiji and Martin Mladenov and Christian Bauckhage and Kristian Kersting},
  	Booktitle = {Proceedings of the International Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Lifted Label Propagation, Symmetries, Lifted Inference, Statistical Relational AI, Statistical Laws, Migration},
  	Note = {Many collective human activities have been shown to exhibit universal patterns. However, the possibility of regularities underlying researcher migration in computer science (CS) has barely been explored at global scale. To a large extend, this is due to official and commercial records being restricted, incompatible between countries, and especially not registered across researchers. We overcome these limitations by building our own, transnational, large-scale dataset inferred from publicly available information on the Web. Essentially, we developed Compressed Label Propagation (CLP) to infer missing geo-tags of author-paper-pairs retrieved from online bibliographies tkaing symmetries of the network into account for speed up of LP. On this dataset, we then find statistical regularities that explain how researchers in CS move from one place to another.},
  	Pages = {171--177},
  	Title = {Computer Science on the Move: Inferring Migration Regularities from the Web via Compressed Label Propagation},
  	Url = {https://www.ijcai.org/Proceedings/15/Papers/031.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {https://www.ijcai.org/Proceedings/15/Papers/031.pdf}}

  @inproceedings{kumaraswamy2015transfer,
  	Anote = {./images/icdm2015transfer.png},
  	Author = {Raksha Kumaraswamy and Phillip Odom and Kristian Kersting and David Leake and Sriraam Natarajan},
  	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Transfer Learning, Statistical Relational Learning, Type Matching},
  	Note = {Transfer learning is typically performed between problem instances within the same domain. We consider the problem of transferring across domains. To this effect, we adopt a probabilistic logic approach. First, our approach automatically identifies predicates in the target domain that are similar in their relational structure to predicates in the source domain. Second, it transfers the logic rules and learns the parameters of the transferred rules using target data. Finally, it refines the rules as necessary using theory refinement. Our experimental evidence supports that this transfer method finds models as good or better than those found with state-of-the-art methods, with and without transfer, and in a fraction of the time.},
  	Organization = {IEEE},
  	Pages = {811--816},
  	Title = {Transfer learning via relational type matching},
  	Url = {./papers/TransferLearningICDM15.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {./papers/TransferLearningICDM15.pdf}}

  @inproceedings{sifa2015predicting,
  	Anote = {./images/aiide2015.png},
  	Author = {Rafet Sifa and Fabian Hadiji and Julian Runge and Anders Drachen and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment (AIIDE)},
  	Keywords = {Purchase Decision, Predictions, Free-to-Play Games},
  	Note = {Mobile digital games are dominantly released under the freemium business model, but only a small fraction of the players makes any purchases. The ability to predict who will make a purchase enables optimization of marketing efforts, and tailoring customer relationship management to the specific user's profile. Here this challenge is addressed via two models for predicting purchasing players, using a 100,000 player dataset: 1) A classification model focused on predicting whether a purchase will occur or not. 2) a regression model focused on predicting the number of purchases a user will make. Both models are presented within a decision and regression tree framework for building rules that are actionable by companies. To the best of our knowledge, this is the first study investigating purchase decisions in freemium mobile products from a user behavior perspective and adopting behavior-driven learning approaches to this problem.},
  	Title = {Predicting purchase decisions in mobile free-to-play games},
  	Url = {https://www.aaai.org/ocs/index.php/AIIDE/AIIDE15/paper/viewFile/11544/11359},
  	Year = {2015},
    Key = {Best Paper Award at AIIDE 2015},
  	Bdsk-Url-1 = {https://www.aaai.org/ocs/index.php/AIIDE/AIIDE15/paper/viewFile/11544/11359}}

  @inproceedings{ide2015lte,
  	Anote = {./images/vtcFall2015.png},
  	Author = {Christoph Ide and Fabian Hadiji and Lars Habel and Alejandro Molina and Thomas Zaksek and Michael Schreckenberg and Kristian Kersting and Christian Wietfeld},
  	Booktitle = {Proceedings of the IEEE 82nd Vehicular Technology Conference (VTC Fall)},
  	Keywords = {Poisson regression, LTE Conectivity, Vehicular Traffic Prediction},
  	Note = {The prediction of both, vehicular traffic and communication connectivity are important research topics. In this paper, we propose the usage of innovative machine learning approaches for these objectives. For this purpose, Poisson Dependency Networks (PDNs) are introduced to enhance the prediction quality of vehicular traffic flows. The machine learning model is fitted based on empirical vehicular traffic data. The results show that PDNs enable a significantly better short-term prediction in comparison to a prediction based on the physics of traffic. To combine vehicular traffic with cellular communication networks, a correlation between connectivity indicators and vehicular traffic flow is shown based on measurement results. This relationship is leveraged by means of Poisson regression trees in both directions, and hence, enabling the prediction of both types of network utilization.},
  	Organization = {IEEE},
  	Pages = {1--5},
  	Title = {LTE Connectivity and vehicular traffic prediction based on machine learning approaches},
  	Url = {./papers/vtcFall2015.pdf},
  	Year = {2015},
  	Bdsk-Url-1 = {./papers/vtcFall2015.pdf}}

  @incollection{bauckhage2015archetypal,
  	Anote = {./images/autoencoder2015.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Florian Hoppe and Christian Thurau},
  	Booktitle = {Workshop on New Challenges in Neural Computation},
  	Keywords = {Simplex Volume Maximization, Autoencoder},
  	Note = {We present an effcient approach to archetypal analysis where we use sub-gradient algorithms for optimization over the simplex to determine archetypes and reconstruction coeffcients. Runtime evaluations reveal our approach to be notably more effcient than previous techniques. As an practical application, we consider archetypal analysis for autoencoding.},
  	Title = {Archetypal analysis as an autoencoder},
  	Url = {./papers/autoencode2015nc2.pdf},
  	Year = {2015},
    Key = {Best Presentation Award at NC^2},
  	Bdsk-Url-1 = {./papers/autoencode2015nc2.pdf}}

  @inbook{bauckhage2015cell,
  	Anote = {./images/cellPhone2015.png},
  	Author = {Christian Bauckhage and Marion Neumann and Lisa Hallau and Kristian Kersting},
  	Booktitle = {Computer Vision and Pattern Recognition in Environmental Informatics},
  	Keywords = {Sustainability, Cell Phone, Plant Disease, Classification},
  	Note = {This chapter reviews and presents approaches to plant disease classification based on cell phone images, a novel way to supply farmers with personalized information and processing recommendations in real time. Several statistical image features and a novel scheme of measuring local textures of leaf spots are introduced. The classification of disease symptoms caused by various fungi or bacteria are evaluated for two important agricultural crop varieties, wheat and sugar beet.},
  	Pages = {295},
  	Publisher = {IGI Global},
  	Title = {Cell Phone Image-Based Plant Disease Classification},
  	Url = {http://www.igi-global.com/chapter/cell-phone-image-based-plant-disease-classification/139599},
  	Year = {2015},
  	Bdsk-Url-1 = {http://www.igi-global.com/chapter/cell-phone-image-based-plant-disease-classification/139599}}

  @book{natarajan2015boosting,
  	Anote = {./images/boostedSRL.jpg},
  	Author = {Sriraam Natarajan and Kristian Kersting and Tushar Khot and Jude Shavlik},
  	Issn = {978-3-319-13643-1},
  	Keywords = {Introduction, Statistical Relational Learning, Functional Gradient Boosting, Inverse Reinforcement Learning, Reinforcement Learning, Structure Learning},
  	Note = {This SpringerBrief addresses the challenges of analyzing multi-relational and noisy data by proposing several Statistical Relational Learning (SRL) methods. It reviews the use of functional gradients for boosting the structure and the parameters of statistical relational models. The algorithms have been applied successfully in several SRL settings and have been adapted to several real problems from Information extraction in text to medical problems.},
  	Publisher = {SpringerBrief},
  	Title = {Boosted Statistical Relational Learners},
  	Url = {http://www.springer.com/de/book/9783319136431},
    Crossref = {https://starling.utdallas.edu/software/boostsrl/wiki/},
  	Year = {2015},
  	Bdsk-Url-1 = {http://www.springer.com/de/book/9783319136431}}

  @article{khot2015gradient,
  	Anote = {./images/mlj2015boosting.png},
  	Author = {Tushar Khot and Sriraam Natarajan and Kristian Kersting and Jude Shavlik},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Markov Logic Network, Missing Data, EM},
  	Note = {Most of the current methods for learning MLN structure follow a two-step approach where first they search through the space of possible clauses (i.e. structures) and then learn weights via gradient descent for these clauses. We present a functional-gradient boosting algorithm to learn both the weights (in closed form) and the structure of the MLN simultaneously. Moreover most of the learning approaches for SRL apply the closed-world assumption, i.e., whatever is not observed is assumed to be false in the world. We attempt to open this assumption. We extend our algorithm for MLN structure learning to handle missing data by using an EM-based approach and show this algorithm can also be used to learn Relational Dependency Networks and relational policies. Our results in many domains demonstrate that our approach can effectively learn MLNs even in the presence of missing data.},
  	Number = {1},
  	Pages = {75--100},
  	Publisher = {Springer},
  	Title = {Gradient-based boosting for statistical relational learning: the Markov logic network and missing data cases},
  	Url = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5481-4.pdf},
  	Volume = {100},
  	Year = {2015},
  	Bdsk-Url-1 = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5481-4.pdf}}

  @article{wahabzada2015metro,
  	Anote = {./images/journal.pone.0116902.g006.png},
  	Author = {Mirwaes Wahabzada and Anne-Katrin Mahlein and Christian Bauckhage and Ulrike Steiner and Erich-Christian Oerke and Kristian Kersting},
  	Journal = {PLoS One},
  	Keywords = {Hyperspectral Images, Metro Maps, Plant Phenotpying, Sustainability, Simplex Volume Maximization, Interpretable Matrix Factorization},
  	Note = {We present a cascade of data mining techniques for fast and reliable data-driven sketching of complex hyperspectral dynamics in plant science and plant phenotyping. We automatically discover archetypal hyperspectral signatures in linear time and use them to create structured summaries that are inspired by metro maps, i.e. schematic diagrams of public transport networks.},
  	Number = {1},
  	Pages = {e0116902},
  	Publisher = {Public Library of Science},
  	Title = {Metro maps of plant disease dynamics---automated mining of differences using hyperspectral images},
  	Url = {http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0116902&type=printable},
  	Volume = {10},
  	Year = {2015},
  	Bdsk-Url-1 = {http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0116902&type=printable}}

  @article{kuska2015hyperspectral,
  	Anote = {./images/hyperMicro2015.png},
  	Author = {Matheus Kuska and Mirwaes Wahabzada and Marlene Leucker and Heinz-Wilhelm Dehne and Kristian Kersting and Erich-Christian Oerke and Ulrike Steiner and Anne-Katrin Mahlein},
  	Journal = {Plant Methods},
  	Keywords = {Hyperspectral Images, Sustainability, Plant Phenotyping, Mircoscope, Simplex Volume Maximization},
  	Note = {Data analysis showed no significant differences in spectral signatures between non-inoculated genotypes. Barley leaves of the near-isogenic genotypes, inoculated with B. graminis f.sp. hordei differed in the spectral reflectance over time, respectively. The susceptible genotypes (WT, Mla12) showed an increase in reflectance in the visible range according to symptom development. However, the spectral signature of the resistant mlo-genotype did not show significant changes over the experimental period. In addition, a recent data driven approach for automated discovery of disease specific signatures, which is based on a new representation of the data using Simplex Volume Maximization (SiVM) was applied. The automated approach - evaluated in only a fraction of time revealed results similar to the time and labor intensive manually assessed hyperspectral signatures. The new representation determined by SiVM was also used to generate intuitive and easy to interpretable summaries, e.g. fingerprints or traces of hyperspectral dynamics of the different genotypes.},
  	Number = {1},
  	Pages = {28},
  	Publisher = {BioMed Central},
  	Title = {Hyperspectral phenotyping on the microscopic scale: towards automated characterization of plant-pathogen interactions},
  	Url = {http://download.springer.com/static/pdf/574/art%253A10.1186%252Fs13007-015-0073-7.pdf?originUrl=http%3A%2F%2Fplantmethods.biomedcentral.com%2Farticle%2F10.1186%2Fs13007-015-0073-7&token2=exp=1493050344~acl=%2Fstatic%2Fpdf%2F574%2Fart%25253A10.1186%25252Fs13007-015-0073-7.pdf*~hmac=8f797422b40d706e7eb0f33d14dc654de20dda990314a238f0f0b65e2af14cc9},
  	Volume = {11},
  	Year = {2015},
  	Bdsk-Url-1 = {http://download.springer.com/static/pdf/574/art%253A10.1186%252Fs13007-015-0073-7.pdf?originUrl=http%3A%2F%2Fplantmethods.biomedcentral.com%2Farticle%2F10.1186%2Fs13007-015-0073-7&token2=exp=1493050344~acl=%2Fstatic%2Fpdf%2F574%2Fart%25253A10.1186%25252Fs13007-015-0073-7.pdf*~hmac=8f797422b40d706e7eb0f33d14dc654de20dda990314a238f0f0b65e2af14cc9}}

  @article{kersting2015statistical,
    Anote = {./images/kersting2015statistical.jpg},
  	Author = {Kristian Kersting and Sriraam Natarajan},
  	Journal = {Künstliche Intelligenz (KI)},
  	Keywords = {Overview, Statistical Relational AI},
  	Note = {Statistical Relational AI---the science and engineering of making intelligent machines acting in noisy worlds composed of objects and relations among the objects---is currently motivating a lot of new AI research and has tremendous theoretical and practical implications. Theoretically, combining logic and probability in a unified representation and building general-purpose reasoning tools for it has been the dream of AI, dating back to the late 1980s. Practically, successful statistical relational AI tools enable new applications in several large, complex real-world domains including those involving big data, natural text, social networks, the web, medicine and robotics, among others. Such domains are often characterized by rich relational structure and large amounts of uncertainty. Logic helps to faithfully model the former while probability helps to effectively manage the latter. Our intention here is to give a brief (and necessarily incomplete) overview and invitation to the emerging field of Statistical Relational AI from the perspective of acting optimally and learning to act.},
  	Number = {4},
  	Pages = {363--368},
  	Publisher = {Springer},
  	Title = {Statistical Relational Artificial Intelligence: From Distributions through Actions to Optimization},
  	Url = {http://dx.doi.org/10.1007/s13218-015-0386-8},
  	Volume = {29},
  	Year = {2015},
  	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s13218-015-0386-8}}

  @article{hadiji2015poisson,
  	Anote = {./images/mlj2015pdns.png},
  	Author = {Fabian Hadiji and Alejandro Molina and Sriraam Natarajan and Kristian Kersting},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Dependency Networks, Multivariate Poisson, Funcational Gradient Boosting},
  	Note = {Although count data are increasingly ubiquitous, surprisingly little work has employed probabilistic graphical models for modeling count data. To ease the modeling of multivariate count data, we therefore introduce a novel family of Poisson graphical models, called Poisson Dependency Networks (PDNs). A PDN consists of a set of local conditional Poisson distributions, each representing the probability of a single count variable given the others, that naturally facilitates a simple Gibbs sampling inference. In contrast to existing Poisson graphical models, PDNs are non-parametric and trained using functional gradient ascent, i.e., boosting.},
  	Number = {2-3},
  	Pages = {477--507},
  	Publisher = {Springer},
  	Title = {Poisson dependency networks: Gradient boosted models for multivariate count data},
  	Url = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5506-z.pdf},
  	Volume = {100},
  	Year = {2015},
  	Bdsk-Url-1 = {https://link.springer.com/content/pdf/10.1007%2Fs10994-015-5506-z.pdf}}

  @article{wahabzada2015automated,
  	Anote = {./images/bmc2015.png},
  	Author = {Mirwaes Wahabzada and Stefan Paulus and Kristian Kersting and Anne-Katrin Mahlein},
  	Journal = {BMC bioinformatics},
  	Keywords = {Clustering, Plant Phenotpying, 3D, Laser Scan, Simplex Volume maximization},
  	Note = {Plant organ segmentation from 3D point clouds is a relevant task for plant phenotyping and plant growth observation. We developed a fully automated, fast and reliable data driven approach for plant organ segmentation. Since normalized histograms, acquired from 3D point clouds, can be seen as samples from a probability simplex, we propose to map the data from the simplex space into Euclidean space using Aitchisons log ratio transformation, or into the positive quadrant of the unit sphere using square root transformation. This, in turn, paves the way to a wide range of commonly used analysis techniques that are based on measuring the similarities between data points using Euclidean distance. The resulting pipeline is fast. Within seconds first insights into plant data can be derived, even from non-labelled data.},
  	Number = {1},
  	Pages = {248},
  	Publisher = {BioMed Central},
  	Title = {Automated interpretation of 3D laserscanned point clouds for plant organ segmentation},
  	Url = {https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-015-0665-2?site=bmcbioinformatics.biomedcentral.com},
  	Volume = {16},
  	Year = {2015},
  	Bdsk-Url-1 = {https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-015-0665-2?site=bmcbioinformatics.biomedcentral.com}}

  @article{neumann2015pygps,
    Anote = {./images/neumann2015pygps.png},
  	Author = {Marion Neumann and Shan Huang and Daniel Marthaler and Kristian Kersting},
  	Journal = {Journal of Machine Learning Research (JMLR)},
  	Keywords = {Toolbox, Gaussian Processes, Python},
  	Note = {We introduce pyGPs, an object-oriented implementation of Gaussian processes (gps) for machine learning. The library provides a wide range of functionalities reaching from simple gp specification via mean and covariance and gp inference to more complex implementations of hyperparameter optimization, sparse approximations, and graph based learning. Using Python we focus on usability for both ``users'' and ``researchers''. Our main goal is to offer a user-friendly and flexible implementation of gps for machine learning.},
  	Pages = {2611--2616},
  	Title = {pyGPs--A python library for Gaussian process regression and classification},
  	Url = {http://www.jmlr.org/papers/volume16/neumann15a/neumann15a.pdf},
  	Volume = {16},
  	Year = {2015},
  	Bdsk-Url-1 = {http://www.jmlr.org/papers/volume16/neumann15a/neumann15a.pdf}}

  @misc{bauckhage2014strong,
  	Anote = {./images/strong2014.png},
  	Author = {Christian Bauckhage and Kristian Kersting},
  	Howpublished = {arXiv preprint arXiv:1406.6529},
  	Keywords = {Collective Attention, Diffusion Models, Social Media},
  	Note = {We analyze general trends and pattern in time series that characterize the dynamics of collective attention to social media services and Web-based businesses. Our study is based on search frequency data available from Google Trends and considers 175 different services. For each service, we collect data from 45 different countries as well as global averages. This way, we obtain more than 8,000 time series which we analyze using diffusion models from the economic sciences. We find that these models accurately characterize the empirical data and our analysis reveals that collective attention to social media grows and subsides in a highly regular and predictable manner. Regularities persist across regions, cultures, and topics and thus hint at general mechanisms that govern the adoption of Web-based services. We discuss several cases in detail to highlight interesting findings. Our methods are of economic interest as they may inform investment decisions and can help assessing at what stage of the general life-cycle a Web service is at.},
  	Title = {Strong Regularities in Growth and Decline of Popularity of Social Media Services},
  	Url = {https://arxiv.org/pdf/1406.6529.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1406.6529.pdf}}

  @misc{kersting2014relational,
  	Anote = {./images/rlp_aij2017.png},
  	Author = {Kristian Kersting and Martin Mladenov and Pavel Tokmakov},
  	Howpublished = {arXiv preprint arXiv:1410.3125},
  	Keywords = {Statistical Relational AI, Relational Linear Programs, Symmetries, Lifted Inference},
  	Note = {We propose relational linear programming, a simple framework for combing linear programs (LPs) and logic programs. A relational linear program (RLP) is a declarative LP template defining the objective and the constraints through the logical concepts of objects, relations, and quantified variables. This allows one to express the LP objective and constraints relationally for a varying number of individuals and relations among them without enumerating them. Together with a logical knowledge base, effectively a logical program consisting of logical facts and rules, it induces a ground LP. This ground LP is solved using lifted linear programming. That is, symmetries within the ground LP are employed to reduce its dimensionality, if possible, and the reduced program is solved using any off-the-shelf LP solver. In contrast to mainstream LP template languages like AMPL, which features a mixture of declarative and imperative programming styles, RLP's relational nature allows a more intuitive representation of optimization problems over relational domains. We illustrate this empirically by experiments on approximate inference in Markov logic networks using LP relaxations, on solving Markov decision processes, and on collective inference using LP support vector machines.},
  	Title = {Relational linear programs},
  	Url = {https://arxiv.org/pdf/1410.3125.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1410.3125.pdf}}

  @misc{neumann2014propagation,
  	Anote = {./images/propKernel_mlg2016.png},
  	Author = {Marion Neumann and Roman Garnett and Christian Bauckhage and Kristian Kersting},
  	Howpublished = {arXiv preprint arXiv:1410.3314},
  	Keywords = {Graph Kernels, Belief Propagation, Information Propagation, Random Walks, Weisfeiler Lehmann},
  	Note = {We introduce propagation kernels, a general graph-kernel framework for efficiently measuring the similarity of structured data. Propagation kernels are based on monitoring how information spreads through a set of given graphs. They leverage early-stage distributions from propagation schemes such as random walks to capture structural information encoded in node labels, attributes, and edge information. This has two benefits. First, off-the-shelf propagation schemes can be used to naturally construct kernels for many graph types, including labeled, partially labeled, unlabeled, directed, and attributed graphs. Second, by leveraging existing efficient and informative propagation schemes, propagation kernels can be considerably faster than state-of-the-art approaches without sacrificing predictive performance. We will also show that if the graphs at hand have a regular structure, for instance when modeling image or video data, one can exploit this regularity to scale the kernel computation to large databases of graphs with thousands of nodes. We support our contributions by exhaustive experiments on a number of real-world graphs from a variety of application domains.},
  	Title = {Propagation Kernels},
  	Url = {https://arxiv.org/pdf/1410.3314.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1410.3314.pdf}}

  @misc{antanas2014high,
  	Anote = {./images/antanas2014high.png},
  	Author = {Laura Antanas and Plinio Moreno and Marion Neumann and Rui Pimentel de Figueiredo and Kristian Kersting and Jose Santos-Victor and Luc De Raedt},
  	Howpublished = {arXiv preprint arXiv:1411.1108},
  	Keywords = {Robotics, Statistical Relational Learning, Grapsing, Graph Kernels, Affordance, Domain Knowledge},
  	Note = {While grasps must satisfy the grasping stability criteria, good grasps depend on the specific manipulation scenario: the object, its properties and functionalities, as well as the task and grasp constraints. In this paper, we consider such information for robot grasping by leveraging manifolds and symbolic object parts. Specifically, we introduce a new probabilistic logic module to first semantically reason about pre-grasp configurations with respect to the intended tasks. Further, a mapping is learned from part-related visual features to good grasping points. The probabilistic logic module makes use of object-task affordances and object/task ontologies to encode rules that generalize over similar object parts and object/task categories. The use of probabilistic logic for task-dependent grasping contrasts with current approaches that usually learn direct mappings from visual perceptions to task-dependent grasping points. We show the benefits of the full probabilistic logic pipeline experimentally and on a real robot.},
  	Title = {High-level Reasoning and Low-level Learning for Grasping: A Probabilistic Logic Pipeline},
  	Url = {https://arxiv.org/pdf/1411.1108.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1411.1108.pdf}}

  @inproceedings{alfeld2014non,
  	Anote = {./images/JoPhy2014.png},
  	Author = {Matthias Alfeld and Mirwaes Wahabzada and Christian Bauckhage and Kristian Kersting and Gerd Wellenreuther and Gerald Falkenberg},
  	Booktitle = {Journal of Physics: Conference Series},
  	Keywords = {Rembrandt, Interpretable Matrix Factorization, Simplex Volume Maximization, Material Science},
  	Note = {Stacks of elemental distribution images acquired by XRF can be difficult to interpret, if they contain high degrees of redundancy and components differing in their quantitative but not qualitative elemental composition. Factor analysis, mainly in the form of Principal Component Analysis (PCA), has been used to reduce the level of redundancy and highlight correlations. PCA, however, does not yield physically meaningful representations as they often contain negative values. This limitation can be overcome, by employing factor analysis that is restricted to non-negativity. In this paper we present the first application of the Python Matrix Factorization Module (pymf) on XRF data. This is done in a case study on the painting Saul and David from the studio of Rembrandt van Rijn. We show how the discrimination between two different Co containing compounds with minimum user intervention and a priori knowledge is supported by Non-Negative Matrix Factorization (NMF).},
  	Number = {1},
  	Organization = {IOP Publishing},
  	Pages = {012013},
  	Title = {Non-negative factor analysis supporting the interpretation of elemental distribution images acquired by XRF},
  	Url = {http://iopscience.iop.org/article/10.1088/1742-6596/499/1/012013/pdf},
  	Volume = {499},
  	Year = {2014},
  	Bdsk-Url-1 = {http://iopscience.iop.org/article/10.1088/1742-6596/499/1/012013/pdf}}

  @inproceedings{grohe2014dimension,
  	Anote = {./images/esa2014.png},
  	Author = {Martin Grohe and Kristian Kersting and Martin Mladenov and Erkal Selman},
  	Booktitle = {Proceedings of the European Symposium on Algorithms (ESA)},
  	Keywords = {Weifeiler Lehmann, Linear Programs, Colour Refinement, Symmetries, Matrices},
  	Note = {We introduce a version of colour refinement for matrices and extend existing quasilinear algorithms for computing the colour classes. Then we generalise the correspondence between colour refinement and fractional automorphisms and develop a theory of fractional automorphisms and isomorphisms of matrices. Finally, we apply our results to reduce the dimensions of systems of linear equations and linear programs.},
  	Organization = {Springer},
  	Pages = {505--516},
  	Title = {Dimension reduction via colour refinement},
  	Url = {https://arxiv.org/pdf/1307.5697.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1307.5697.pdf}}

  @inproceedings{bauckhage2014collective,
  	Anote = {./images/bauckhage2014collective.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Bashir Rastegarpanah},
  	Booktitle = {Proceedings of the 23rd International Conference on World Wide Web (WWW)},
  	Keywords = {Diffusion Models, Social Media, Collective Attention},
  	Note = {We investigate patterns of adoption of 175 social media services and Web businesses using data from Google Trends. For each service, we collect aggregated search frequencies from 45 countries as well as global averages. This results in more than 8.000 time series which we analyze using economic diffusion models. The models are found to provide accurate and statistically significant fits to the data and show that collective attention to social media grows and subsides in a highly regular manner. Regularities persist across regions, cultures, and topics and thus hint at general mechanisms that govern the adoption of Web-based services.},
  	Organization = {ACM},
  	Pages = {223--224},
  	Title = {Collective attention to social media evolves according to diffusion models},
  	Url = {http://wwwconference.org/proceedings/www2014/companion/p223.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {http://wwwconference.org/proceedings/www2014/companion/p223.pdf}}

  @inproceedings{mladenov2014efficient,
  	Anote = {./images/aistats2014locality.png},
  	Author = {Martin Mladenov and Kristian Kersting and Amir Globerson},
  	Booktitle = {Proceedings of the 17th International Conference on Artificial Intelligence and Statistics (AISTATS)},
  	Keywords = {Statistical Relational AI, Lifted Inference, MAP, Symmetries, Sherali-Adams, k-Locality, Tightening},
  	Note = {We show that symmetry in MAP inference problems can be discovered using an elegant algorithm known as the k-dimensional Weisfeiler-Lehman (k-WL) algorithm. We run k-WL on the original graphical model, and not on the far larger graph of the linear program (LP) as proposed in earlier work in the field. Furthermore, the algorithm is polynomial and thus far more practical than other previous approaches which rely on orbit partitions that are GI complete to find. The fact that k-WL can be used in this manner follows from the recently introduced notion of k-local LPs and their relation to Sherali Adams relaxations of graph automorphisms.},
  	Pages = {623--632},
  	Title = {Efficient Lifting of MAP LP Relaxations Using k-Locality},
  	Url = {http://proceedings.mlr.press/v33/mladenov14.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {http://proceedings.mlr.press/v33/mladenov14.pdf}}

  @inproceedings{apsel2014lifting,
  	Address = {AAAI Press},
  	Anote = {./images/apsel2014lifting.png},
  	Author = {Udi Apsel and Kristian Kersting and Martin Mladenov},
  	Booktitle = {Proceedings of the 28th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, MAP, Cluster Signature Graphs, Symmetries, Sherali-Adams},
  	Note = {Large scale graphical models often exhibit considerable symmetry, and it is a challenge to devise algorithms that exploit this symmetry to speed up inference. Recently, the automorphism group has been proposed to formalize mathematically what exploiting symmetry means. However, obtaining symmetry derived from automorphism is GI-hard, and consequently only a small fraction of the symmetry is easily available for effective employment. In this paper, we improve upon efficiency in two ways. First, we introduce the Cluster Signature Graph (CSG), a platform on which greater portions of the symmetries can be revealed and exploited. CSGs classify clusters of variables by projecting relations between cluster members onto a graph, allowing for the efficient pruning of symmetrical clusters even before their generation. Second, we introduce a novel framework based on CSGs for the Sherali-Adams hierarchy of linear program (LP) relaxations, dedicated to exploiting this symmetry for the benefit of tight Maximum A Posteriori (MAP) approximations. Combined with the pruning power of CSG, the framework quickly generates compact formulations for otherwise intractable LPs, as demonstrated by several empirical results.},
  	Pages = {2403--2409},
  	Title = {Lifting Relational MAP-LPs Using Cluster Signatures},
  	Url = {./papers/apsel2014lifting.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {./papers/apsel2014lifting.pdf}}

  @inproceedings{neumann2014erosion,
  	Anote = {./images/neumann2014erosion.png},
  	Author = {Marion Neumann and Lisa Hallau and Benjamin Klatt and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the 22nd IEEE International Conference on Pattern Recognition (ICPR)},
  	Keywords = {Plant Disease, Cell Phone, Classification, Sustainability},
  	Note = {We introduce a novel set of features for a challenging image analysis task in agriculture where cell phone camera images of beet leaves are analyzed as to the presence of plant diseases. Aiming at minimal computational costs on the cellular device and highly accurate prediction results, we present an efficient detector of potential disease regions and a robust classification method based on texture features. We evaluate several first- and second-order statistical features for classifying textures of leaf spots and we find that a combination of descriptors derived on multiple erosion bands of the RGB color channels, as well as, the local binary patterns of gradient magnitudes of the extracted regions accurately distinguish between symptoms caused by five diseases, including infections of the fungi Cercospora beticola, Ramularia beticola, Uromyces betae, and Phoma betae, and the bacterium Pseudomonas syringae pv. aptata.},
  	Organization = {IEEE},
  	Pages = {3315--3320},
  	Title = {Erosion band features for cell phone image based plant disease classification},
  	Url = {./papers/neumann2014erosion.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {./papers/neumann2014erosion.pdf}}

  @inproceedings{hernandez2014mind,
  	Anote = {./images/hernandez2014mind.png},
  	Author = {Daniel Hernandez-Lobato and Viktoriia Sharmanska and Kristian Kersting and Christoph Lampert and Novi Quadrianto},
  	Booktitle = {Proceedings of Neural Information Processing Systems (NIPS)},
  	Keywords = {Gaussian Processes, Privileged Information, LUPI},
  	Note = {The learning with privileged information setting has recently attracted a lot of attention within the machine learning community, as it allows the integration of additional knowledge into the training process of a classifier, even when this comes in the form of a data modality that is not available at test time. Here, we show that privileged information can naturally be treated as noise in the latent function of a Gaussian process classifier (GPC). That is, in contrast to the standard GPC setting, the latent function is not just a nuisance but a feature: it becomes a natural measure of confidence about the training data by modulating the slope of the GPC probit likelihood function. Extensive experiments on public datasets show that the proposed GPC method using privileged noise, called GPC+, improves over a standard GPC without privileged knowledge, and also over the current state-of-the-art SVM-based method, SVM+. Moreover, we show that advanced neural networks and deep learning methods can be compressed as privileged information.},
  	Pages = {837--845},
  	Title = {Mind the nuisance: Gaussian process classification using privileged noise},
  	Url = {https://papers.nips.cc/paper/5373-mind-the-nuisance-gaussian-process-classification-using-privileged-noise.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://papers.nips.cc/paper/5373-mind-the-nuisance-gaussian-process-classification-using-privileged-noise.pdf}}

  @inproceedings{kersting2014power,
  	Anote = {./images/aaai2014picr.png},
  	Author = {Kristian Kersting and Martin Mladenov and Roman Garnett and Martin Grohe},
  	Booktitle = {Proceedings of the 28th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Weisfeiler Lehmann, Power Iteration, Page Rank, Equivalence},
  	Note = {Color refinement is a basic algorithmic routine for graph isomorphism testing and has recently been used for computing graph kernels as well as for lifting belief propagation and linear programming. So far, color re- finement has been treated as a combinatorial problem. Instead, we treat it as a nonlinear continuous optimization problem and prove that it implements a conditional gradient optimizer that can be turned into graph clustering approaches using hashing and truncated power iterations. This shows that color refinement is easy to understand in terms of random walks, easy to implement (matrix-matrix/vector multiplications) and readily parallelizable. We support our theoretical results with experiments on real-world graphs with millions of edges.},
  	Title = {Power iterated color refinement},
  	Url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8377/8828},
  	Year = {2014},
  	Bdsk-Url-1 = {https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8377/8828}}

  @inproceedings{poole2014population,
  	Anote = {./images/poole2014population.png},
  	Author = {David Poole and David Buchman and Seyed Mehran Kazemi and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Proceedings of the International Conference on Scalable Uncertainty Management (SUM)},
  	Keywords = {Statistical Relational Learning, Relational Logistic Regression, Population Size},
  	Note = {When building probabilistic relational models it is often difficult to determine what formulae or factors to include in a model. Different models make quite di erent predictions about how probabilities are a effected by population size. We show some general patterns that hold in some classes of models for all numerical parametrizations. Given a data set, it is often easy to plot the dependence of probabilities on population size, which, together with prior knowledge, can be used to rule out classes of models, where just assessing or fitting numerical parameters will be misleading. In this paper we analyze the dependence on population for relational undirected models (in particular Markov logic networks) and relational directed models (for relational logistic regression). Finally we show how probabilities for real data sets depend on the population size.},
  	Organization = {Springer},
  	Pages = {292--305},
  	Title = {Population size extrapolation in relational probabilistic modelling},
  	Url = {./papers/poole2014population.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {./papers/poole2014population.pdf}}

  @inproceedings{mladenov2014lifted,
  	Anote = {./images/reparam2014uai.png},
  	Author = {Martin Mladenov and Amir Globerson and Kristian Kersting},
  	Booktitle = {Proceedings of the 30th International Conference on Uncertainty in AI (UAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Symmetries, MPLP, Linear Programs, Reparametrization, Energies},
  	Note = {Lifted inference approaches can considerably speed up probabilistic inference in Markov random fields (MRFs) with symmetries. Given evidence, they essentially form a lifted, i.e., reduced factor graph by grouping together indistinguishable variables and factors. Typically, however, lifted factor graphs are not amenable to offthe-shelf message passing (MP) approaches, and hence requires one to use either generic optimization tools, which would be slow for these problems, or design modified MP algorithms. Here, we demonstrate that the reliance on modified MP can be eliminated for the class of MP algorithms arising from MAP-LP relaxations of pairwise MRFs. Specifically, we show that a given MRF induces a whole family of MRFs of different sizes sharing essentially the same MAPLP solution. In turn, we give an efficient algorithm to compute from them the smallest one that can be solved using off-the-shelf MP.},
  	Pages = {603--612},
  	Title = {Lifted Message Passing as Reparametrization of Graphical Models},
  	Url = {http://www.auai.org/uai2014/proceedings/individuals/215.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {http://www.auai.org/uai2014/proceedings/individuals/215.pdf}}

  @inproceedings{hadiji2014predicting,
  	Anote = {./images/hadiji2014predicting.png},
  	Author = {Fabian Hadiji and Rafet Sifa and Anders Drachen and Christian Thurau and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the IEEE Conference on Computational intelligence and games (CIG)},
  	Keywords = {Churn Prediction, Computer Games, Free-to-play Games},
  	Note = {The ability to model, understand and predict future player behavior has a crucial value, allowing developers to obtain data-driven insights to inform design, development and marketing strategies. One of the key challenges is modeling and predicting player churn. This paper presents the first cross-game study of churn prediction in Free-to-Play games. Churn in games is discussed and thoroughly defined as a formal problem, aligning with industry standards. Furthermore, a range of features which are generic to games are defined and evaluated for their usefulness in predicting player churn, e.g. playtime, session length and session intervals. Using these behavioral features, combined with the individual retention model for each game in the dataset used, we develop a broadly applicable churn prediction model, which does not rely on gamedesign specific features. The presented classifiers are applied on a dataset covering five free-to-play games resulting in high accuracy churn prediction.},
  	Organization = {IEEE},
  	Pages = {1--8},
  	Title = {Predicting player churn in the wild},
  	Url = {./papers/hadiji2014predicting.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {./papers/hadiji2014predicting.pdf}}

  @inproceedings{yang2014learning,
  	Anote = {./images/icdm2014softBalance.png},
  	Author = {Shuo Yang and Tushar Khot and Kristian Kersting and Gautam Kunapuli and Kris Hauser and Sriraam Natarajan},
  	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Imbalanced Data, Soft Margin},
  	Note = {We consider the problem of learning probabilistic models from relational data. One of the key issues with relational data is class imbalance where the number of negative examples far outnumbers the number of positive examples. The common approach for dealing with this problem is the use of sub-sampling of negative examples. We, on the other hand, consider a soft margin approach that explicitly trades off between the false positives and false negatives. We apply this approach to the recently successful formalism of relational functional gradient boosting. Specifically, we modify the objective function of the learning problem to explicitly include the trade-off between false positives and negatives. We show empirically that this approach is more successful in handling the class imbalance problem than the original framework that weighed all the examples equally.},
  	Organization = {IEEE},
  	Pages = {1085--1090},
  	Title = {Learning from imbalanced data in relational domains: A soft margin approach},
  	Url = {./papers/icdm2014softMargin.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {./papers/icdm2014softMargin.pdf}}

  @inproceedings{kriege2014explicit,
  	Anote = {./images/expliciteFMgraphs.png},
  	Author = {Nils Kriege and Marion Neumann and Kristian Kersting and Petra Mutzel},
  	Booktitle = {Proceedings of the IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Graph Kernels, Explicit Feature Map, Phase Transition},
  	Note = {Surprisingly, many of the recent graph kernels do not employ the kernel trick anymore but rather compute an explicit feature map and report higher efficiency. So, is there really no benefit of the kernel trick when it comes to graphs? Triggered by this question, we investigate under which conditions it is possible to compute a graph kernel explicitly and for which graph properties this computation is actually more efficient. We give a sufficient condition for R-convolution kernels that enables kernel computation by explicit mapping. We theoretically and experimentally analyze efficiency and flexibility of implicit kernel functions and dot products of explicitly computed feature maps for widely used graph kernels such as random walk kernels, sub graph matching kernels, and shortest-path kernels. For walk kernels we observe a phase transition when comparing runtime with respect to label diversity and walk lengths leading to the conclusion that explicit computations are only favourable for smaller label sets and walk lengths whereas implicit computation is superior for longer walk lengths and data sets with larger label diversity.},
  	Organization = {IEEE},
  	Pages = {881--886},
  	Title = {Explicit versus implicit graph feature maps: A computational phase transition for walk kernels},
  	Url = {http://ieeexplore.ieee.org/document/7023417/?reload=true},
  	Year = {2014},
  	Bdsk-Url-1 = {http://ieeexplore.ieee.org/document/7023417/?reload=true}}

  @incollection{apsel2014lifting_ws,
  	Author = {Udi Apsel and Kristian Kersting and Martin Mladenov},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, MAP, Cluster Signature Graphs, Symmetries, Sherali-Adams},
  	Note = {Large scale graphical models often exhibit considerable symmetry, and it is a challenge to devise algorithms that exploit this symmetry to speed up inference. Recently, the automorphism group has been proposed to formalize mathematically what exploiting symmetry means. However, obtaining symmetry derived from automorphism is GI-hard, and consequently only a small fraction of the symmetry is easily available for effective employment. In this paper, we improve upon efficiency in two ways. First, we introduce the Cluster Signature Graph (CSG), a platform on which greater portions of the symmetries can be revealed and exploited. CSGs classify clusters of variables by projecting relations between cluster members onto a graph, allowing for the efficient pruning of symmetrical clusters even before their generation. Second, we introduce a novel framework based on CSGs for the Sherali-Adams hierarchy of linear program (LP) relaxations, dedicated to exploiting this symmetry for the benefit of tight Maximum A Posteriori (MAP) approximations. Combined with the pruning power of CSG, the framework quickly generates compact formulations for otherwise intractable LPs, as demonstrated by several empirical results.},
  	Title = {Lifting Relational MAP-LPs Using Cluster Signatures},
  	Year = {2014}}

  @incollection{kazemi2014relational,
  	Author = {Seyed Mehran Kazemi and David Buchman and Kristian Kersting and Sriraam Natarajan and David Poole},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational Learning, Relational Logistics Regression, Markov Logic Networks, Population Size},
  	Note = {Relational logistic regression (RLR) is the directed analogue of Markov logic networks. Whereas Markov logic networks define distributions in terms of weighted formulae, RLR defines conditional probabilities in terms of weighted formulae. They agree for the supervised learning case when all variables except a query leaf variable are observed. However, they are quite different in representing distributions. The KR-2014 paper defined the RLR formalism, defined canonical forms for RLR in terms of positive conjunctive (or disjunctive) formulae, indicated the class of conditional probability distributions that can and cannot be represented by RLR, and defined many other aggregators in terms of RLR. In this paper, we summarize these results and compare RLR to Markov logic networks.},
  	Title = {Relational Logistic Regression: The Directed Analog of Markov Logic Networks},
  	Url = {https://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/download/8849/8233},
  	Year = {2014},
  	Bdsk-Url-1 = {https://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/download/8849/8233}}

  @incollection{poyrekar2014deeper,
  	Anote = {./images/poyrekar2014deeper.png},
  	Author = {Shrutika Poyrekar and Sriraam Natarajan and Kristian Kersting},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Symmetries, Grounding},
  	Note = {In this work-in-progress, we consider a lifted inference algorithm and analyze its scaling properties. We compare two versions of this algorithm -- the original implementation and a newer implementation built on a database. Our preliminary results show that constructing the factor graph from the relational model rather than the construction of the compressed model is the key bottleneck for the application of lifted inference in large domains.},
  	Title = {A Deeper Empirical Analysis of CBP Algorithm: Grounding Is the Bottleneck},
  	Url = {https://pdfs.semanticscholar.org/ab7a/c6b528b4774ce6965f3f18ec42da8c303989.pdf},
  	Year = {2014},
  	Bdsk-Url-1 = {https://pdfs.semanticscholar.org/ab7a/c6b528b4774ce6965f3f18ec42da8c303989.pdf}}

  @article{natarajan2014relational,
  	Anote = {./images/alzheimer2014cypernetic.png},
  	Author = {Sriraam Natarajan and Baidya Saha and Saket Joshi and Adam Edwards and Tushar Khot and Elizabeth M Davenport and Kristian Kersting and Christopher T Whitlow and Joseph A Maldjian},
  	Journal = {International Journal of Machine Learning and Cybernetics},
  	Keywords = {Statistical Relational Learning, Medicine, Alzheimer, Classification, Relational Grids},
  	Note = {Magnetic resonance imaging (MRI) has emerged as an important tool to identify intermediate biomarkers of Alzheimer's disease (AD) due to its ability to measure regional changes in the brain that are thought to reflect disease severity and progression. In this paper, we set out a novel pipeline that uses volumetric MRI data collected from different subjects as input and classifies them into one of three classes: AD, mild cognitive impairment (MCI) and cognitively normal (CN). Our pipeline consists of three stages---(1) a segmentation layer where brain MRI data is divided into clinically relevant regions; (2) a classification layer that uses relational learning algorithms to make pairwise predictions between the three classes; and (3) a combination layer that combines the results of the different classes to obtain the final classification. One of the key features of our proposed approach is that it allows for domain expert's knowledge to guide the learning in all the layers. We evaluate our pipeline on 397 patients acquired from the Alzheimer's Disease Neuroimaging Initiative and demonstrate that it obtains state-of-the-art performance with minimal feature engineering.},
  	Number = {5},
  	Pages = {659--669},
  	Publisher = {Springer},
  	Title = {Relational learning helps in three-way classification of alzheimer patients from structural magnetic resonance images of the brain},
  	Url = {https://link.springer.com/article/10.1007/s13042-013-0161-9},
  	Volume = {5},
  	Year = {2014},
  	Bdsk-Url-1 = {https://link.springer.com/article/10.1007/s13042-013-0161-9}}

  @article{bauckhage2014kunstliche,
  	Anote = {./images/spiele2014.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Christian Thurau},
  	Journal = {Informatik-Spektrum},
  	Keywords = {Overview, Computer Games, Artificial Intelligence},
  	Note = {Die technische Entwicklung von Computerspielen und die Entwicklung von Methoden der Künstlichen Intelligenz (KI) gehen seit Jahrzehnten Hand in Hand. Spektakul{\"a}re Erfolge der KI in Spieleszenarien sind etwa der Sieg des Schachcomputers Deep Blue über den damaligen Weltmeister Gary Kasparow im Jahr 1997 oder der Gewinn der Quizshow Jeopardy durch das Programm Watson im Jahr 2010. Standen lange Zeit Fragen zur Implementierung möglichst intelligenter und glaubwürdiger künstlicher Spieler im Vordergrund, ergeben sich durch aktuelle Entwicklungen in den Bereichen mobile- und social gaming neue Problemstellungen für die KI. Dieser Artikel beleuchtet die historische Entwicklung der KI in Computerspielen und diskutiert die Herausforderungen, die sich in modernen Spieleszenarien ergeben.},
  	Number = {6},
  	Pages = {531--538},
  	Publisher = {Springer},
  	Title = {Künstliche Intelligenz für Computerspiele},
  	Url = {https://link.springer.com/content/pdf/10.1007%2Fs00287-014-0822-4.pdf},
  	Volume = {37},
  	Year = {2014},
  	Bdsk-Url-1 = {https://link.springer.com/content/pdf/10.1007%2Fs00287-014-0822-4.pdf}}

  @misc{hadiji2013geodblp,
  	Anote = {./images/geoDBLP2013.png},
  	Author = {Fabian Hadiji and Kristian Kersting and Christian Bauckhage and Babak Ahmadi},
  	Howpublished = {arXiv preprint arXiv:1304.7984},
  	Keywords = {DBLP, Affiliations, Locations, Label Propahtion, Statistical Laws, Migration},
  	Note = {Official and commercial records of research migration are often access restricted, incompatible between countries, and especially not registered across researchers. Instead, we introduce GeoDBLP where we propagate geographical seed locations retrieved from the web across the DBLP database of 1,080,958 authors and 1,894,758 papers. But perhaps more important is that we are able to find statistical patterns and create models that explain the migration of researchers. For instance, we show that the science job market can be treated as a Poisson process with individual propensities to migrate following a log-normal distribution over the researcher's career stage. That is, although jobs enter the market constantly, researchers are generally not memoryless but have to care greatly about their next move. The propensity to make k>1 migrations, however, follows a gamma distribution suggesting that migration at later career stages is memoryless. This aligns well but actually goes beyond scientometric models typically postulated based on small case studies. On a very large, transnational scale, we establish the first general regularities that should have major implications on strategies for education and research worldwide.},
  	Title = {GeoDBLP: Geo-tagging dblp for mining the sociology of computer science},
  	Url = {https://arxiv.org/pdf/1304.7984.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1304.7984.pdf}}

  @misc{bauckhage2013efficient,
  	Anote = {./images/infTheoClust2013.png},
  	Author = {Christian Bauckhage and Kristian Kersting},
  	Howpublished = {arXiv preprint arXiv:1310.7114},
  	Keywords = {Clustering, Information-Theoretic, Discrete Lattices},
  	Note = {We consider the problem of clustering data that reside on discrete, low dimensional lattices. Canonical examples for this setting are found in image segmentation and key point extraction. Our solution is based on a recent approach to information theoretic clustering where clusters result from an iterative procedure that minimizes a divergence measure. We replace costly processing steps in the original algorithm by means of convolutions. These allow for highly efficient implementations and thus significantly reduce runtime. This paper therefore bridges a gap between machine learning and signal processing.},
  	Title = {Efficient information theoretic clustering on discrete lattices},
  	Url = {https://arxiv.org/pdf/1310.7114.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1310.7114.pdf}}

  @inproceedings{natarajan2013accelerating,
  	Anote = {./images/natarajan2013accelerating.png},
  	Author = {Sriraam Natarajan and Phillip Odom and Saket Joshi and Tushar Khot and Kristian Kersting and Prasad Tadepalli},
  	Booktitle = {Proceedings of the 23rd International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Relational Imitation Learning, Transfer Learning, Relational Domains},
  	Note = {The problem of learning to mimic a human expert/teacher from training trajectories is called imitation learning. To make the process of teaching easier in this setting, we propose to employ transfer learning (where one learns on a source problem and transfers the knowledge to potentially more complex target problems). We consider multi-relational environments such as real-time strategy games and use functional-gradient boosting to capture and transfer the models learned in these environments. Our experiments demonstrate that our learner learns a very good initial model from the simple scenario and effectively transfers the knowledge to the more complex scenario thus achieving a jump start, a steeper learning curve and a higher convergence in performance.},
  	Organization = {Springer},
  	Pages = {64--75},
  	Title = {Accelerating imitation learning in relational domains via transfer by initialization},
  	Url = {./papers/natarajan2013accelerating.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/natarajan2013accelerating.pdf}}

  @inproceedings{bauckhage2013mathematical,
  	Anote = {./images/bauckhage2013mathematical.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Fabian Hadiji},
  	Booktitle = {Proceedings of the 7th International AAAI Conference on Weblogs and Social Media (ICWSM)},
  	Keywords = {Collective Attention, Dynamics, Memes, Statistical Models},
  	Note = {Internet memes are a pervasive phenomenon on the social Web. They typically consist of viral catch phrases, images, or videos that spread through instant messaging, (micro) blogs, forums, and social networking sites. Due to their popularity and proliferation, Internet memes attract interest in areas as diverse as marketing, sociology, or computer science and have been dubbed a new form of communication or artistic expression. In this paper, we examine the merits of such claims and analyze how collective attention into Internet memes evolves over time. We introduce and discuss statistical models of the dynamics of fads and fit them to meme related time series obtained from Google Trends. Given data as to more than 200 memes, we find that our models provide more accurate descriptions of the dynamics of growth and decline of collective attention to individual Internet memes than previous approaches from the literature. In short, our results suggest that Internet memes are nothing but fads.},
  	Title = {Mathematical Models of Fads Explain the Temporal Dynamics of Internet Memes},
  	Url = {./papers/bauckhage2013mathematical.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/bauckhage2013mathematical.pdf}}

  @inproceedings{natarajan2013early,
  	Anote = {./images/natarajan2013early.png},
  	Author = {Sriraam Natarajan and Kristian Kersting and Edward Ip and David R Jacobs and Jeffrey Carr},
  	Booktitle = {Proceedings of the 25th Innovative Applications of Artificial Intelligence Conference (IAAI)},
  	Keywords = {Statistical Relational Learning, Longitudinal Study, Functional Gradient Boosting, Coronary Artery Calcification},
  	Note = {Coronary heart disease (CHD) is a major cause of death worldwide.In the U.S. CHD is responsible for approximated 1 in every 6 deaths with a coronary event occurring every 25 seconds and about 1 death every minute based on data current to 2007.Although a multitude of cardiovascular risks factors have been identified, CHD actually reflects complexinteractions of these factors over time. Today's datasets from longitudinal studies offer great promise to uncover these interactions but also pose enormous analytical problems due to typically large amount of both discrete and continuous measurements and risk factors with potential long-range interactions over time. Our investigation demonstrates that a statistical relational analysis of longitudinal data can easily uncover complex interactions of risks factors and actually predict future coronary artery calcification (CAC) levels --- an indicator of the risk of CHD present subclinically in an individual --- significantly better than traditional non-relational approaches. The uncovered long-range interactions between risk factors conform to existing clinical knowledgeand are successful in identifying risk factors at the early adult stage. This may contribute to monitoring young adults via smartphones and to designing patient-specific treatments in young adults to mitigate their risk later.},
  	Title = {Early Prediction of Coronary Artery Calcification Levels Using Machine Learning},
  	Url = {./papers/natarajan2013early.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/natarajan2013early.pdf}}

  @inproceedings{khot2013learning,
  	Anote = {./images/mlj2015boosting.png},
  	Author = {Tushar Khot and Sriraam Natarajan and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Proceedings of the 23rd International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Missing Data, EM, Functional Gradient Boosting},
  	Note = {Recent years have seen a surge of interest in learning the structure of Statistical Relational Learning (SRL) models that combine logic with probabilities. Most of these models apply the closed-world assumption i.e., whatever is not observed is false in the world. In this work, we consider the problem of learning the structure of SRL models in the presence of hidden data i.e. we open the closed-world assumption. We develop a functional-gradient boosting algorithm based on EM to learn the structure and parameters of the models simultaneously and apply it to learn different kinds of models -- Relational Dependency Networks, Markov Logic Networks and relational policies. Our results in a variety of domains demonstrate that the algorithms can effectively learn with missing data.},
  	Title = {Learning relational probabilistic models from partially observed data - Opening the closed-world assumption},
  	Url = {./papers/ilp2013openingCWA.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/ilp2013openingCWA.pdf}}

  @inproceedings{neumann2013coinciding,
  	Anote = {./images/neumann2013coinciding.png},
  	Author = {Marion Neumann and Roman Garnett and Kristian Kersting},
  	Booktitle = {Proceedings of the 5th Asian Conference on Machine Learning (ACML)},
  	Keywords = {Graph Kernels, Coinciding Walks, Random Walks},
  	Note = {Exploiting autocorrelation for node-label prediction in networked data has led to great success. However, when dealing with sparsely labeled networks, common in present-day tasks, the autocorrelation assumption is difficult to exploit. Taking a step beyond, we propose the coinciding walk kernel (cwk), a novel kernel leveraging label-structure similarity -- the idea that nodes with similarly arranged labels in their local neighbourhoods are likely to have the same label -- for learning problems on partially labeled graphs. Inspired by the success of random walk based schemes for the construction of graph kernels, cwk is defined in terms of the probability that the labels encountered during parallel random walks coincide. In addition to its intuitive probabilistic interpretation, coinciding walk kernels outperform existing kernel- and walk-based methods on the task of node-label prediction in sparsely labeled graphs with high label-structure similarity. We also show that computing cwks is faster than many state-of-the-art kernels on graphs. We evaluate cwks on several realworld networks, including cocitation and coauthor graphs, as well as a graph of interlinked populated places extracted from the dbpedia knowledge base.},
  	Pages = {357--372},
  	Title = {Coinciding Walk Kernels: Parallel Absorbing Random Walks for Learning with Graphs and Few Labels},
  	Url = {http://proceedings.mlr.press/v29/Neumann13.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {http://proceedings.mlr.press/v29/Neumann13.pdf}}

  @incollection{mahlein2013hypersectral,
  	Author = {Anne-Katrin Mahlein and Mirwaes Wahabzada and Kristian Kersting and Ulrike Steiner and Erich-Christian Oerke},
  	Booktitle = {Bornimer Agrartechnische Berichte: Proceedings of the 19. Workshop Computer-Bildanalyse in der Landwirtschaft},
  	Keywords = {Hyperspectral Images, Plant Phenotpying, Interpretable Matrix Factorization, Sustainability},
  	Note = {Optical hyperspectral sensors are promising tools for the detection and monitoring of plant diseases. Innovative sensor systems can provide detailed and highly resolved information on crop canopies and single plants in the visible, near infrared and shortwave infrared range (400 to 2500 nm). The interpretation and analysis of the complex and high dimensional hyperspectral data is challenging. Powerful and disease-specific analysis methods have to be developed for a successful evaluation of diseased plants by hyperspectral sensors. Based on the model system barley and the foliar diseases net blotch (caused by Pyrenophora teres), leaf rust (due to Puccinia hordei) and powdery mildew (due to Blumeria graminis hordei) full range spectral signatures of infected leaves were recorded using hyperspectral VIS/NIR and SWIR cameras. Spectral data cubes were analysed by extracting specific spectral signatures of disease symptoms during pathogenesis. Spectral patterns were observed with high resolution in time and space. Advanced data mining methods were used for the differentiation and quantification of diseased leaf tissue. An effective and automatic image analysis approach has been established by now. In a next step the models will be extended to other crops and their diseases.},
  	Pages = {154-158},
  	Title = {Hyperspectral image analysis for automatic detection of plant diseases},
  	Url = {https://opus4.kobv.de/opus4-slbp/frontdoor/index/index/docId/3617},
  	Year = {2013},
  	Bdsk-Url-1 = {https://opus4.kobv.de/opus4-slbp/frontdoor/index/index/docId/3617}}

  @incollection{ballvora2013deep,
  	Author = {Agim Ballvora and Christoph Römer and Mirwaes Wahabzada and Uwe Rascher and Christian Thurau and Christian Bauckhage and Kristian Kersting and  Plümer and Jens Leon},
  	Booktitle = {Advance in Barley Sciences},
  	Keywords = {Sustainability, Plant Phenotyping, Simplex Volume Maximization},
  	Note = {The basic mechanisms of yield maintenance under drought conditions are far from being understood. Pre-symptomatic water stress recognition would help to get insides into complex plant mechanistic basis of plant response when confronted to water shortage conditions and is of great relevance in precision plant breeding and production. The plant reactions to drought stress result in spatial, temporal and tissue-specific pattern changes which can be detected using non-invasive sensor techniques, such as hyperspectral imaging. The ``response turning time-point'' in the temporal curve of plant response to stress rather than the maxima is the most relevant time-point for guided sampling to get insights into mechanistic basis of plant response to drought stress. Comparative hyperspectral image analysis was performed on barley (Hordeum vulgare) plants grown under well-watered and water stress conditions in two consecutive years. The obtained massive, high-dimensional data cubes were analysed with a recent matrix factorization technique based on simplex volume maximization of hyperspectral data and compared to several drought-related traits. The results show that it was possible to detect and visualize the accelerated senescence signature in stressed plants earlier than symptoms become visible by the naked eye.},
  	Pages = {317--326},
  	Publisher = {Springer},
  	Title = {``Deep Phenotyping'' of Early Plant Response to Abiotic Stress Using Non-invasive Approaches in Barley},
  	Url = {https://link.springer.com/chapter/10.1007%2F978-94-007-4682-4_26},
  	Year = {2013},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007%2F978-94-007-4682-4_26}}

    @inproceedings{hadiji2013aaai_reduce,
    	Anote = {./images/hadiji2013reduce.png},
    	Author = {Fabian Hadiji and Kristian Kersting},
    	Booktitle = {Proceedings of the Twenty-Seventh AAAI Conference on Artificial
               Intelligence (AAAI)},
    	Keywords = {Statistical Relational AI, Lifted Inference, Re-lifting, MAP, Likelihood Maximization},
    	Note = {By handling whole sets of indistinguishable objects together, lifted belief propagation approaches have rendered large, previously intractable, probabilistic inference problems quickly solvable. In this paper, we show that Kumar and Zilberstein's likelihood maximization (LM) approach to MAP inference is liftable, too, and actually provides additional structure for optimization. Specifically, it has been recognized that some pseudo marginals may converge quickly, turning intuitively into pseudo evidence. This additional evidence typically changes the structure of the lifted network: it may expand or reduce it. The current lifted network, however, can be viewed as an upper bound on the size of the lifted network required to finish likelihood maximization. Consequently, we re-lift the network only if the pseudo evidence yields a reduced network, which can efficiently be computed on the current lifted network. Our experimental results on Ising models, image segmentation and relational entity resolution demonstrate that this bootstrapped LM via ``reduce and re-lift'' finds MAP assignments comparable to those found by the original LM approach, but in a fraction of the time.},
    	Title = {Reduce and Re-Lift: Bootstrapped Lifted Likelihood Maximization for MAP},
    	Url = {./papers/hadiji2013reduce.pdf},
    	Year = {2013},
    	Bdsk-Url-1 = {./papers/hadiji2013reduce.pdf}}

  @incollection{hadiji2013reduce,
  	Anote = {./images/hadiji2013reduce.png},
  	Author = {Fabian Hadiji and Kristian Kersting},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Re-lifting, MAP, Likelihood Maximization},
  	Note = {By handling whole sets of indistinguishable objects together, lifted belief propagation approaches have rendered large, previously intractable, probabilistic inference problems quickly solvable. In this paper, we show that Kumar and Zilberstein's likelihood maximization (LM) approach to MAP inference is liftable, too, and actually provides additional structure for optimization. Specifically, it has been recognized that some pseudo marginals may converge quickly, turning intuitively into pseudo evidence. This additional evidence typically changes the structure of the lifted network: it may expand or reduce it. The current lifted network, however, can be viewed as an upper bound on the size of the lifted network required to finish likelihood maximization. Consequently, we re-lift the network only if the pseudo evidence yields a reduced network, which can efficiently be computed on the current lifted network. Our experimental results on Ising models, image segmentation and relational entity resolution demonstrate that this bootstrapped LM via ``reduce and re-lift'' finds MAP assignments comparable to those found by the original LM approach, but in a fraction of the time.},
  	Title = {Reduce and Re-Lift: Bootstrapped Lifted Likelihood Maximization for MAP},
  	Url = {./papers/hadiji2013reduce.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/hadiji2013reduce.pdf}}

  @incollection{neumann2013graph_ws,
  	Author = {Marion Neumann and Plinio Moreno and Laura Antanas and Roman Garnett and Kristian Kersting},
  	Booktitle = {Working Notes of the 11th workshop on mining and learning with graphs (MLG)},
  	Keywords = {Graph Kernels, Robotics, Grasping},
  	Note = {The choice of grasping region is highly dependent on the category of object, and the automated prediction of object category is the problem we focus on here. In this paper, we consider manifold information and semantic object parts in a graph kernel to predict categories of a large variety of household objects such as cups, pots, pans, bottles, and various tools. The similarity based category prediction is achieved by employing propagation kernels, a recently introduced graph kernel for partially labeled graphs, on graph representations of 3D point clouds of objects.},
  	Title = {Graph kernels for object category prediction in task-dependent robot grasping},
  	Url = {./papers/neumann2013graph.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/neumann2013graph.pdf}}

  @incollection{neumann2013coinciding_ws,
  	Author = {Marion Neumann and Roman Garnett and Kristian Kersting},
  	Booktitle = {Working Notes of the 11th Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {Random Walks, Coinciding Walks, Graph Kernels},
  	Note = {Exploiting autocorrelation for node-label prediction in networked data has led to great success. However, when dealing with sparsely labeled networks, common in present-day tasks, the autocorrelation assumption is difficult to exploit. Taking a step beyond, we propose the coinciding walk kernel (cwk), a novel kernel leveraging label-structure similarity -- the idea that nodes with similarly arranged labels in their local neighbourhoods are likely to have the same label -- for learning problems on partially labeled graphs. Inspired by the success of random walk based schemes for the construction of graph kernels, cwk is defined in terms of the probability that the labels encountered during parallel random walks coincide. In addition to its intuitive probabilistic interpretation, coinciding walk kernels outperform existing kernel- and walk-based methods on the task of node-label prediction in sparsely labeled graphs with high label-structure similarity. We also show that computing cwks is faster than many state-of-the-art kernels on graphs. We evaluate cwks on several realworld networks, including cocitation and coauthor graphs, as well as a graph of interlinked populated places extracted from the dbpedia knowledge base.},
  	Title = {Coinciding Walk Kernels},
  	Year = {2013}}

  @incollection{ahmadi2013mapreduce,
  	Anote = {./images/ahmadi2013mapreduce.png},
  	Author = {Babak Ahmadi and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Map Reduce, Lifted Inference, Belief Propagation, Statistical Relational AI},
  	Note = {Judging by the increasing impact of machine learning on large-scale data analysis in the last decade, one can anticipate a substantial growth in diversity of the machine learning applications for ``big data'' over the next decade. This exciting new opportunity, however, also raises many challenges. One of them is scaling inference within and training of graphical models. Typical ways to address this scaling issue are inference by approximate message passing, stochastic gradients, and MapReduce, among others. Often, we encounter inference and training problems with symmetries and redundancies in the graph structure. It has been shown that inference and training can indeed benefit from exploiting symmetries, for example by lifting loopy belief propagation (LBP).That is, a model is compressed by grouping nodes together that send and receive identical messages so that a modified LBP running on the lifted graph yields the same marginals as LBP on the original one, but often in a fraction of time. By establishing a link between lifting and radix sort, we show that lifting is MapReduce-able and thus combine the two orthogonal approaches to scaling inference, namely exploiting symmetries and employing parallel computations.},
  	Title = {MapReduce Lifting for Belief Propagation},
  	Url = {./papers/ahmadi2013mapreduce.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/ahmadi2013mapreduce.pdf}}

  @incollection{mladenov2013lifted,
  	Anote = {./images/mladenov2013lifted.png},
  	Author = {Martin Mladenov and Kristian Kersting},
  	Booktitle = {Working Notes of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Sherali-Admas, k-Locality, Tighteting, MAP},
  	Note = {Lifted inference approaches exploit symmetries of a graphical model. So far, only the automorphism group of the graphical model has been proposed to formalize the symmetries used. We show that this is only the GIcomplete tip of a hierarchy and that the amount of lifting depends on how local the inference algorithm is: if the LP relaxation introduces constraints involving features over at most k variables, then the amount of lifting decreases monotonically with k. This induces a hierarchy of lifted inference algorithms, with lifted BP and MPLP at the bottom and exact inference methods at the top. In between, there are relaxations whose liftings are equitable partitions of intermediate coarseness, which all can be computed in polynomial time.},
  	Title = {Lifted Inference via k-Locality},
  	Url = {./papers/mladenov2013lifted.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/mladenov2013lifted.pdf}}

  @incollection{natarajan2013using_ws,
  	Author = {Sriraam Natarajan and Jose Picado and Tushar Khot and Kristian Kersting and Christopher Re and Jude W Shavlik},
  	Booktitle = {Proceedings of the AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Distance Supervision, Statistical Relational Learning, Commonsense Knowledge, Functional Gradient Boosting},
  	Note = {One of the challenges to information extraction is the requirement of human annotated examples. Current successful approaches alleviate this problem by employing some form of distant supervision i.e., look into knowledge bases such as Freebase as a source of supervision to create more examples. While this is perfectly reasonable, most distant supervision methods rely on a hand coded background knowledge that explicitly looks for patterns in text. In this work, we take a different approach -- we create weakly supervised examples for relations by using commonsense knowledge. The key innovation is that this commonsense knowledge is completely independent of the natural language text. This helps when learning the full model for information extraction as against simply learning the parameters of a known CRF or MLN. We demonstrate on two domains that this form of weak supervision yields superior results when learning structure compared to simply using the gold standard labels.},
  	Pages = {13--16},
  	Title = {Using Commonsense Knowledge to Automatically Create (Noisy) Training Examples from Text},
  	Url = {./papers/natarajan2013using_ws.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/natarajan2013using_ws.pdf}}

  @incollection{bauckhage2013weibull,
  	Anote = {./images/bauckhage2013weibull.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Bashir Rastegarpanah},
  	Booktitle = {Proceedings of the International Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {Weibull, Shortest Path, Distance Distribution, Undirected Network},
  	Note = {We address the problem of characterizing shortest path his- tograms of networks in terms of continuous, analytically tractable distributions. Based on a recent model for the expected number of paths between arbitrary vertices in ran- dom networks, we establish the Weibull distribution as the corresponding distribution of minimal path lengths. Empir- ical tests with di erent graph topologies con rm our theo- retical prediction. Our methodology allows for computing non-linear low dimensional embeddings of path histograms for visual analytics.},
  	Title = {The Weibull as a model of shortest path distributions in random networks},
  	Url = {./papers/bauckhage2013weibull.pdf},
  	Year = {2013},
  	Bdsk-Url-1 = {./papers/bauckhage2013weibull.pdf}}

  @article{bauckhage2013can,
  	Author = {Christian Bauckhage and Kristian Kersting},
  	Journal = {Künstliche Intelligenz (KI)},
  	Keywords = {Wisdom of the Crowd, Aesthetic, Images},
  	Note = {The social media revolution has led to an abundance of image and video data on the Internet. Since this data is typically annotated, rated, or commented upon by large communities, it provides new opportunities and challenges for computer vision. Social networking and content sharing sites seem to hold the key to the integration of context and semantics into image analysis. In this paper, we explore the use of social media in this regard. We present empirical results obtained on a set of 127,593 images with 3,741,176 tag assignments that were harvested from Flickr, a photo sharing site. We report on how users tag and rate photos and present an approach towards automatically recognizing the aesthetic appeal of images using confidence-based classifiers to alleviate effects due to ambiguously labeled data. Our results indicate that user generated content allows for learning about aesthetic appeal. In particular, established low-level image features seem to enable the recognition of beauty. A reliable recognition of unseemliness, on the other hand, appears to require more elaborate high-level analysis.},
  	Number = {1},
  	Pages = {25--35},
  	Publisher = {Springer},
  	Title = {Can Computers Learn from the Aesthetic Wisdom of the Crowd?},
  	Url = {https://link.springer.com/article/10.1007%2Fs13218-012-0232-1},
  	Volume = {27},
  	Year = {2013},
  	Bdsk-Url-1 = {https://link.springer.com/article/10.1007%2Fs13218-012-0232-1}}

  @article{ahmadi2013exploiting,
  	Anote = {./images/mlj2013lbp.png},
  	Author = {Babak Ahmadi and Kristian Kersting and Martin Mladenov and Sriraam Natarajan},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Statistical Relational AI, Statistical Relational Learning, Symmetries, Lifted Inference, Parameter Estimation, Stochastic Gradient, Map Reduce},
  	Note = {Often, we encounter inference and training problems with symmetries and redundancies in the graph structure. In this paper, we show that inference and training can indeed benefit from exploiting symmetries. Specifically, we show that (loopy) belief propagation (BP) can be lifted. That is, a model is compressed by grouping nodes together that send and receive identical messages so that a modified BP running on the lifted graph yields the same marginals as BP on the original one, but often in a fraction of time. By establishing a link between lifting and radix sort, we show that lifting is MapReduce-able. Still, in many if not most situations training relational models will not benefit from this (scalable) lifting: symmetries within models easily break since variables become correlated by virtue of depending asymmetrically on evidence. An appealing idea for such situations is to train and recombine local models. This breaks long-range dependencies and allows to exploit lifting within and across the local training tasks. Moreover, it naturally paves the way for the first scalable lifted training approaches based on stochastic gradients, both in an online and a MapReduced fashion.},
  	Number = {1},
  	Pages = {91--132},
  	Publisher = {Springer},
  	Title = {Exploiting symmetries for scaling loopy belief propagation and relational training},
  	Url = {https://pdfs.semanticscholar.org/86ad/b234ca2a4bb9204ceaa66833b427f40b59df.pdf},
  	Volume = {92},
  	Year = {2013},
  	Bdsk-Url-1 = {https://pdfs.semanticscholar.org/86ad/b234ca2a4bb9204ceaa66833b427f40b59df.pdf}}

  @article{bauckhage2013data,
  	Author = {Christian Bauckhage and Kristian Kersting},
  	Journal = {Künstliche Intelligenz (KI)},
  	Keywords = {Overview, Data Mining, Agriculture, Sustainability},
  	Note = {Modern communication, sensing, and actuator technologies as well as methods from signal processing, pattern recognition, and data mining are increasingly applied in agriculture. Developments such as increased mobility, wireless networks, new environmental sensors, robots, and the computational cloud put the vision of a sustainable agriculture for anybody, anytime, and anywhere within reach. Yet, precision farming is a fundamentally new domain for computational intelligence and constitutes a truly interdisciplinary venture. Accordingly, researchers and experts of complementary skills have to cooperate in order to develop models and tools for data intensive discovery that allow for operation through users that are not necessarily trained computer scientists. We present approaches and applications that address these challenges and underline the potential of data mining and pattern recognition in agriculture.},
  	Number = {4},
  	Pages = {313--324},
  	Publisher = {Springer},
  	Title = {Data mining and pattern recognition in agriculture},
  	Url = {https://link.springer.com/article/10.1007/s13218-013-0273-0},
  	Volume = {27},
  	Year = {2013},
  	Bdsk-Url-1 = {https://link.springer.com/article/10.1007/s13218-013-0273-0}}

  @misc{kersting2012say,
  	Anote = {./images/kersting2012say.png},
  	Author = {Kristian Kersting and Tapani Raiko},
  	Howpublished = {arXiv preprint arXiv:1207.1353},
  	Keywords = {Statistical Relational Learning, Structural EM, Structure Learning, Complex Sequences},
  	Note = {Many real world sequences such as protein secondary structures or shell logs exhibit a rich internal structures. Traditional probabilistic models of sequences, however, consider sequences of flat symbols only. Logical hidden Markov models have been proposed as one solution. They deal with logical sequences, i.e., sequences over an alphabet of logical atoms. This comes at the expense of a more complex model selection problem. Indeed, different abstraction levels have to be explored. In this paper, we propose a novel method for selecting logical hidden Markov models from data called SAGEM. SAGEM combines generalized expectation maximization, which optimizes parameters, with structure search for model selection using inductive logic programming refinement operators. We provide convergence and experimental results that show SAGEM's effectiveness.},
  	Title = {'Say EM'for selecting probabilistic models for logical sequences},
  	Url = {https://arxiv.org/pdf/1207.1353.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1207.1353.pdf}}

  @misc{blockeel2012revised,
  	Author = {Hendrik Blockeel and Kristian Kersting and Siegfried Nijssen and Filip Zelezny},
  	Howpublished = {arXiv preprint arXiv:1207.6324},
  	Keywords = {Publication model},
  	Note = {ECML PKDD is the main European conference on machine learning and data mining. Since its foundation it implemented the publication model common in computer science: there was one conference deadline; conference submissions were reviewed by a program committee; papers were accepted with a low acceptance rate. Proceedings were published in several Springer Lecture Notes in Artificial (LNAI) volumes, while selected papers were invited to special issues of the Machine Learning and Data Mining and Knowledge Discovery journals. In recent years, this model has however come under stress. Problems include: reviews are of highly variable quality; the purpose of bringing the community together is lost; reviewing workloads are high; the information content of conferences and journals decreases; there is confusion among scientists in interdisciplinary contexts. In this paper, we present a new publication model, which will be adopted for the ECML PKDD 2013 conference, and aims to solve some of the problems of the traditional model. The key feature of this model is the creation of a journal track, which is open to submissions all year long and allows for revision cycles.},
  	Title = {A revised publication model for ECML PKDD},
  	Url = {https://arxiv.org/pdf/1207.6324.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1207.6324.pdf}}

  @misc{wahabzada2012latent,
  	Anote = {./images/wahabzada2012latent.png},
  	Author = {Mirwaes Wahabzada and Kristian Kersting and Christian Bauckhage and Christoph Römer and Agim Ballvora and Francisco Pinto and Uwe Rascher and Jens Leon and Lutz Plümer},
  	Howpublished = {arXiv preprint arXiv:1210.4919},
  	Keywords = {Plant Phenotyping, Sustainability, Latent Dirichlet Allocation, Online, Variational},
  	Note = {Understanding the adaptation process of plants to drought stress is essential in improving management practices, breeding strategies as well as engineering viable crops for a sustainable agriculture in the coming decades. Hyper-spectral imaging provides a particularly promising approach to gain such understanding since it allows to discover non-destructively spectral characteristics of plants governed primarily by scattering and absorption characteristics of the leaf internal structure and biochemical constituents. Several drought stress indices have been derived using hyper-spectral imaging. However, they are typically based on few hyper-spectral images only, rely on interpretations of experts, and consider few wavelengths only. In this study, we present the first data-driven approach to discovering spectral drought stress indices, treating it as an unsupervised labeling problem at massive scale. To make use of short range dependencies of spectral wavelengths, we develop an online variational Bayes algorithm for latent Dirichlet allocation with convolved Dirichlet regularizer. This approach scales to massive datasets and, hence, provides a more objective complement to plant physiological practices. The spectral topics found conform to plant physiological knowledge and can be computed in a fraction of the time compared to existing LDA approaches.},
  	Title = {Latent dirichlet allocation uncovers spectral characteristics of drought stressed plants},
  	Url = {https://arxiv.org/pdf/1210.4919.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1210.4919.pdf}}

  @inproceedings{kersting2012simplex,
  	Anote = {./images/kersting2012simplex.png},
  	Author = {Kristian Kersting and Mirwaes Wahabzada and Christoph Römer and Christian Thurau and Agim Ballvora and Uwe Rascher and Jens Leon and Christian Bauckhage and Lutz Plümer},
  	Booktitle = {Proceedings of the SIAM International Conference on Data Mining (SDM)},
  	Keywords = {Simplex Volume Maximization, Eurclidean Embeddding, Simplex Distribuation, KL-divergence},
  	Note = {Early stress recognition is of great relevance in precision plant protection. Pre-symptomatic water stress detection is of particular interest, ultimately helping to meet the challenge of ``How to feed a hungry world?''. Due to the climate change, this is of considerable political and public interest. Due to its large-scale and temporal nature, e.g., when monitoring plants using hyper-spectral imaging, and the demand of physical meaning of the results, it presents unique computational problems in scale and interpretability. However, big data matrices over time also arise in several other real-life applications such as stock market monitoring where a business sector is characterized by the ups and downs of each of its companies per year or topic monitoring of document collections. Therefore, we consider the general problem of embedding data matrices into Euclidean space over time without making any assumption on the generating distribution of each matrix. To do so, we represent all data samples by means of convex combinations of only few extreme ones computable in linear time. On the simplex spanned by the extremes, there are then natural candidates for distributions inducing distances between and in turn embeddings of the data matrices. We evaluate our method across several domains, including synthetic, text, and financial data as well as a large-scale dataset on water stress detection in plants with more than 3 billion matrix entries. The results demonstrate that the embeddings are meaningful and fast to compute. The stress detection results were validated by a domain expert and conform to existing plant physiological knowledge. Read More: http://epubs.siam.org/doi/abs/10.1137/1.9781611972825.26},
  	Organization = {SIAM},
  	Pages = {295--306},
  	Title = {Simplex distributions for embedding data matrices over time},
  	Url = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611972825.26},
  	Year = {2012},
  	Bdsk-Url-1 = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611972825.26}}

  @inproceedings{thurau2012deterministic,
  	Anote = {./images/thurau2012deterministic.png},
  	Author = {Christian Thurau and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the SIAM International Conference on Data Mining (SDM)},
  	Keywords = {CUR, Matrix Factorization, Simplex Volume Maximization, Deterministic},
  	Note = {Low-rank approximations which are computed from selected rows and columns of a given data matrix have attracted considerable attention lately. They have been proposed as an alternative to the SVD because they naturally lead to interpretable decompositions which was shown to be successful in application such as fraud detection, fMRI segmentation, and collaborative filtering. The CUR decomposition of large matrices, for example, samples rows and columns according to a probability distribution that depends on the Euclidean norm of rows or columns or on other measures of statistical leverage. At the same time, there are various deterministic approaches that do not resort to sampling and were found to often yield factorization of superior quality with respect to reconstruction accuracy. However, these are hardly applicable to large matrices as they typically suffer from high computational costs. Consequently, many practitioners in the field of data mining have abandon deterministic approaches in favor of randomized ones when dealing with today's large-scale data sets. In this paper, we empirically disprove this prejudice. We do so by introducing a novel, linear-time, deterministic CUR approach that adopts the recently introduced Simplex Volume Maximization approach for column selection. The latter has already been proven to be successful for NMF-like decompositions of matrices of billions of entries. Our exhaustive empirical study on more than 30 synthetic and real-world data sets demonstrates that it is also beneficial for CUR-like decompositions. Compared to other deterministic CUR-like methods, it provides comparable reconstruction quality but operates much faster so that it easily scales to matrices of billions of elements. Compared to sampling-based methods, it provides competitive reconstruction quality while staying in the same run-time complexity class.},
  	Organization = {SIAM},
  	Pages = {684--695},
  	Title = {Deterministic CUR for improved large-scale data analysis: An empirical study},
  	Url = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611972825.59},
  	Year = {2012},
  	Bdsk-Url-1 = {http://epubs.siam.org/doi/pdf/10.1137/1.9781611972825.59}}

  @inproceedings{mladenov2012lifted,
  	Anote = {./images/mladenov2012lifted.png},
  	Author = {Martin Mladenov and Babak Ahmadi and Kristian Kersting},
  	Booktitle = {Proceedings of the 15th International Conference on Artificial Intelligence and Statistics (AISTATS)},
  	Keywords = {Symmetries, Lifted Inference, Linear Programs, Weisfeiler Lehmann, Colour Refinement},
  	Note = {Lifted inference approaches have rendered large, previously intractable probabilistic inference problems quickly solvable by handling whole sets of indistinguishable objects together. Triggered by this success, we show that another important AI technique is liftable, too, namely linear programming. Intuitively, given a linear program (LP), we employ a lifted variant of Gaussian belief propagation (GaBP) to solve the systems of linear equations arising when running an interiorpoint method to solve the LP. However, this na¨ıve solution cannot make use of standard solvers for linear equations and is doomed to construct lifted networks in each iteration of the interior-point method again, an operation that can itself be quite costly. To address both issues, we show how to read off an equivalent LP from the lifted GaBP computations that can be solved using any off-the-shelf LP solver. We prove the correctness of this compilation approac and experimentally demonstrate that it can greatly reduce the cost of solving LPs.},
  	Title = {Lifted Linear Programming},
  	Url = {http://proceedings.mlr.press/v22/mladenov12/mladenov12.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {http://proceedings.mlr.press/v22/mladenov12/mladenov12.pdf}}

  @inproceedings{schiegg2012markov,
  	Anote = {./images/schiegg2012markov.png},
  	Author = {Martin Schiegg and Marion Neumann and Kristian Kersting},
  	Booktitle = {Proceedings of the 15th International Conference on Artificial Intelligence and Statistics (AISTATS)},
  	Keywords = {Statistical Relational Learning, Markov Logic Networks, Gaussian Processes, Joint Training},
  	Note = {We propose a novel mixtures of Gaussian processes model in which the gating function is interconnected with a probabilistic logical model, in our case Markov logic networks. In this way, the resulting mixed graphical model, called Markov logic mixtures of Gaussian processes (MLxGP), solves joint Bayesian non-parametric regression and probabilistic relational inference tasks. In turn, MLxGP facilitates novel, interesting tasks such as regression based on logical constraints or drawing probabilistic logical conclusions about regression data, thus putting ``machines reading regression data'' in reach.},
  	Pages = {1002--1011},
  	Title = {Markov Logic Mixtures of Gaussian Processes: Towards Machines Reading Regression Data},
  	Url = {http://proceedings.mlr.press/v22/schiegg12/schiegg12.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {http://proceedings.mlr.press/v22/schiegg12/schiegg12.pdf}}

  @inproceedings{kersting2012pre,
  	Anote = {./images/kersting2012pre.png},
  	Author = {Kristian Kersting and Zhao Xu and Mirwaes Wahabzada and Christian Bauckhage and Christian Thurau and Christoph Roemer and Agim Ballvora and Uwe Rascher and Jens Leon and Lutz Plümer},
  	Booktitle = {Proceedings of the 26th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Sustainability, Prediction, Dirichlet-Aggregation, Regression, Gaussian Processes, Simplex Volume Maximization, Plant Phenotyping},
  	Note = {Pre-symptomatic drought stress prediction is of great relevance in precision plant protection, ultimately helping to meet the challenge of How to feed a hungry world?. Unfortunately, it also presents unique computational problems in scale and interpretability: it is a temporal, large-scale prediction task, e.g., when monitoring plants over time using hyperspectral imaging, and features are `things' with a `biological' meaning and interpretation and not just mathematical abstractions computable for any data. In this paper we propose Dirichlet-aggregation regression (DAR) to meet the challenge. DAR represents all data by means of convex combinations of only few extreme ones computable in linear time and easy to interpret.Then, it puts a Gaussian process prior on the Dirichlet distributions induced on the simplex spanned by the extremes. The prior can be a function of any observed meta feature such as time, location, type of fertilization, and plant species. We evaluated DAR on two hyperspectral image series of plants over time with about 2 (resp. 5.8) Billion matrix entries. The results demonstrate that DAR can be learned efficiently and predicts stress well before it becomes visible to the human eye.},
  	Title = {Pre-Symptomatic Prediction of Plant Drought Stress Using Dirichlet-Aggregation Regression on Hyperspectral Images},
  	Url = {./papers/kersting2012pre.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/kersting2012pre.pdf}}

  @inproceedings{kersting2012matrix,
  	Anote = {./images/kersting2012matrix.png},
  	Author = {Kristian Kersting and Christian Bauckhage and Christian Thurau and Mirwaes Wahabzada},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Interpreational Matrix Factorization, Achetypal Analysis, Simplex Volume Maximization, Search},
  	Note = {Simplex Volume Maximization (SiVM) exploits distance geometry for effciently factorizing gigantic matrices. It was proven successful in game, social media, and plant mining. Here, we review the distance geometry approach and argue that it generally suggests to factorize gigantic matrices using search-based instead of optimization techniques.},
  	Pages = {850--853},
  	Publisher = {Springer},
  	Title = {Matrix factorization as search},
  	Url = {http://www.cs.bris.ac.uk/~flach/ECMLPKDD2012papers/1125818.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {http://www.cs.bris.ac.uk/~flach/ECMLPKDD2012papers/1125818.pdf}}

  @inproceedings{neumann2012efficient,
  	Anote = {./images/propKernel_mlg2016.png},
  	Author = {Marion Neumann and Novi Patricia and Roman Garnett and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Graph Kernels, Information Propagation, Random Walks, Weisfeiler Lehmann, Symmtries},
  	Note = {Learning from complex data is becoming increasingly important, and graph kernels have recently evolved into a rapidly developing branch of learning on structured data. However, previously proposed kernels rely on having discrete node label information. In this paper, we explore the power of continuous node-level features for propagation-based graph kernels. Specifically, propagation kernels exploit node label distributions from propagation schemes like label propagation, which naturally enables the construction of graph kernels for partially labeled graphs. In order to effciently extract graph features from continuous node label distributions, and in general from continuous vector-valued node attributes, we utilize randomized techniques, which easily allow for deriving similarity measures based on propagated information. We show that propagation kernels utilizing locality-sensitive hashing reduce the runtime of existing graph kernels by several orders of magnitude. We evaluate the performance of various propagation kernels on real-world bioinformatics and image benchmark datasets.},
  	Pages = {378--393},
  	Publisher = {Springer},
  	Title = {Efficient graph kernels by randomization},
  	Url = {http://www.cs.bris.ac.uk/~flach/ECMLPKDD2012papers/1125542.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {http://www.cs.bris.ac.uk/~flach/ECMLPKDD2012papers/1125542.pdf}}

  @inproceedings{bauckhage2012players,
  	Anote = {./images/bauckhage2012players.png},
  	Author = {Christian Bauckhage and Kristian Kersting and Rafet Sifa and Christian Thurau and Anders Drachen and Alessandro Canossa},
  	Booktitle = {Proceedings of the IEEE Conference on Computational Intelligence and Games (CIG)},
  	Keywords = {Collective Attention, Weilbull, Computer Games},
  	Note = {Analyzing telemetry data of player behavior in computer games is a topic of increasing interest for industry and research, alike. When applied to game telemetry data, pattern recognition and statistical analysis provide valuable business intelligence tools for game development. An important problem in this area is to characterize how player engagement in a game evolves over time. Reliable models are of pivotal interest since they allow for assessing the long-term success of game products and can provide estimates of how long players may be expected to keep actively playing a game. In this paper, we introduce methods from random process theory into game data mining in order to draw inferences about player engagement. Given large samples (over 250,000 players) of behavioral telemetry data from five different action-adventure and shooter games, we extract information as to how long individual players have played these games and apply techniques from lifetime analysis to identify common patterns. In all five cases, we find that the Weibull distribution gives a good account of the statistics of total playing times. This implies that an average player's interest in playing one of the games considered evolves according to a non-homogeneous Poisson process. Therefore, given data on the initial playtime behavior of the players of a game, it becomes possible to predict when they stop playing.},
  	Organization = {IEEE},
  	Pages = {139--146},
  	Title = {How players lose interest in playing a game: An empirical study based on distributions of total playing times},
  	Url = {./papers/bauckhage2012players.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/bauckhage2012players.pdf}}

  @inproceedings{ahmadi2012lifted,
  	Anote = {./images/ahmadi2012lifted.png},
  	Author = {Babak Ahmadi and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD)},
  	Date-Modified = {2017-04-30 06:59:10 +0000},
  	Keywords = {Statistical Relational Learning, Symmetries, Stochastic Gradients, Relational Domain},
  	Note = {Lifted inference approaches have rendered large, previously intractable probabilistic inference problems quickly solvable by employing symmetries to handle whole sets of indistinguishable random variables. Still, in many if not most situations training relational models will not benefit from lifting: symmetries within models easily break since variables become correlated by virtue of depending asymmetrically on evidence. An appealing idea for such situations is to train and recombine local models. This breaks long-range dependencies and allows to exploit lifting within and across the local training tasks. Moreover, it naturally paves the way for online training for relational models. Specifically, we develop the first lifted stochastic gradient optimization method with gain vector adaptation, which processes each lifted piece one after the other. On several datasets, the resulting optimizer converges to the same quality solution over an order of magnitude faster, simply because unlike batch training it starts optimizing long before having seen the entire mega-example even once.},
  	Organization = {Springer},
  	Title = {Lifted Online Training of Relational Models with Stochastic Gradients},
  	Url = {./papers/ahmadi2012lifted.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/ahmadi2012lifted.pdf}}

  @inproceedings{kersting2012lifted,
  	Anote = {./images/kersting2012lifted.png},
  	Author = {Kristian Kersting},
  	Booktitle = {Proceedings of the 20th European conference on Artificial Intelligence (ECAI)},
  	Keywords = {Overview, Lifted Inference, Statistical Relational AI},
  	Note = {Many AI problems arising in a wide variety of fields such as machine learning, semantic web, network communication, computer vision, and robotics can elegantly be encoded and solved using probabilistic graphical models. Often, however, we are facing inference problems with symmetries and redundancies only implicitly captured in the graph structure and, hence, not exploitable by ef- ficient inference approaches. A prominent example are probabilistic logical models that tackle a long standing goal of AI, namely unifying first-order logic --- capturing regularities and symmetries --- and probability --- capturing uncertainty. Although they often encode large, complex models using few rules only and, hence, symmetries and redundancies abound, inference in them was originally still at the propositional representation level and did not exploit symmetries. This paper is intended to give a (not necessarily complete) overview and invitation to the emerging field of lifted probabilistic inference, inference techniques that exploit these symmetries in graphical models in order to speed up inference, ultimately orders of magnitude.},
  	Pages = {33--38},
  	Publisher = {IOS Press},
  	Title = {Lifted probabilistic inference},
  	Url = {./papers/kersting2012lifted.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/kersting2012lifted.pdf}}

  @inproceedings{fierens2012pairwise,
  	Anote = {./images/fierens2012pairwise.png},
  	Author = {Daan Fierens and Kristian Kersting and Jesse Davis and Jian Chen and Martin Mladenov},
  	Booktitle = {Proceedings of the 22nd International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Statistical Relational AI, Markov Logic Networks, Pairwise},
  	Note = {We introduce pairwise Markov Logic, a subset of Markov Logic where each formula contains at most two atoms. We show that every non-pairwise Markov Logic Network (MLN) can be transformed or `reduced' to a pairwise MLN. Thus, existing, highly efficient probabilistic inference methods can be employed for pairwise MLNs without the overhead of devising or implementing high-order variants. Experiments on two relational datasets confirm the usefulness of this reduction approach.},
  	Organization = {Springer},
  	Pages = {58--73},
  	Title = {Pairwise Markov Logic},
  	Url = {./papers/fierens2012pairwise.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/fierens2012pairwise.pdf}}

  @inproceedings{zamani2012symbolic,
  	Anote = {./images/zamani2012symbolic.png},
  	Author = {Zahra Zamani and Scott Sanner and Pascal Poupart and Kristian Kersting},
  	Booktitle = {Proceedings of Neural Information Processing Systems (NIPS)},
  	Keywords = {Statistical Relational AI, Relational Reinforcement Learning, Lifted Inference, Symboic Dynamic Programming, Continous States, Continuous Actions},
  	Note = {Point-based value iteration (PBVI) methods have proven extremely effective for finding (approximately) optimal dynamic programming solutions to partiallyobservable Markov decision processes (POMDPs) when a set of initial belief states is known. However, no PBVI work has provided exact point-based backups for both continuous state and observation spaces, which we tackle in this paper. Our key insight is that while there may be an infinite number of observations, there are only a finite number of continuous observation partitionings that are relevant for optimal decision-making when a finite, fixed set of reachable belief states is considered. To this end, we make two important contributions: (1) we show how previous exact symbolic dynamic programming solutions for continuous state MDPs can be generalized to continuous state POMDPs with discrete observations, and (2) we show how recently developed symbolic integration methods allow this solution to be extended to PBVI for continuous state and observation POMDPs with potentially correlated, multivariate continuous observation spaces.},
  	Pages = {1394--1402},
  	Title = {Symbolic dynamic programming for continuous state and observation POMDPs},
  	Url = {https://papers.nips.cc/paper/4756-symbolic-dynamic-programming-for-continuous-state-and-observation-pomdps.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {https://papers.nips.cc/paper/4756-symbolic-dynamic-programming-for-continuous-state-and-observation-pomdps.pdf}}

  @inproceedings{natarajan2012machine,
  	Anote = {./images/natarajan2012machine.png},
  	Author = {Sriraam Natarajan and Saket Joshi and Baidya Nath Saha and Adam Edwards and Tushar Khot and Elizabeth Moody and Kristian Kersting and Christopher T Whitlow and Joseph A Maldjian},
  	Booktitle = {Proceedings of the 11th International Conference on Machine Learning and Applications (ICMLA)},
  	Keywords = {Alzheimer, Medicine, Statistical Relational Learning, Functional Gradient Boosting, Relational Grids},
  	Note = {Magnetic resonance imaging (MRI) has emerged as an important tool to identify intermediate biomarkers of Alzheimer's disease (AD) due to its ability to measure regional changes in the brain that are thought to reflect disease severity and progression. In this paper, we set out a novel pipeline that uses volumetric MRI data collected from different subjects as input and classifies them into one of three classes: AD, mild cognitive impairment (MCI) and cognitively normal (CN). Our pipeline consists of three stages -- (1) a segmentation layer where brain MRI data is divided into clinically relevant regions; (2) a classification layer that uses relational learning algorithms to make pairwise predictions between the three classes; and (3) a combination layer that combines the results of the different classes to obtain the final classification. One of the key features of our proposed approach is that it allows for domain expert's knowledge to guide the learning in all the layers. We evaluate our pipeline on 397 patients acquired from the Alzheimer's Disease Neuroimaging Initiative and demonstrate that it obtains state-oft-he-art performance with minimal feature engineering.},
  	Organization = {IEEE},
  	Pages = {203--208},
  	Title = {A machine learning pipeline for three-way classification of alzheimer patients from structural magnetic resonance images of the brain},
  	Url = {./papers/natarajan2012machine.pdf},
  	Volume = {1},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/natarajan2012machine.pdf}}

  @inproceedings{xu2012efficient,
  	Anote = {./images/xu2012efficient.png},
  	Author = {Zhao Xu and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the IEEE 12th International Conference on Data Mining (ICDM)},
  	Keywords = {Learning to Hash, Proportional Data, Simplex Volume Maximization, Ultra-Fast, Approximate Nearest-Neighbour},
  	Note = {Spectral hashing (SH) seeks compact binary codes of data points so that Hamming distances between codes correlate with data similarity. Quickly learning such codes typically boils down to principle component analysis (PCA). However, this is only justified for normally distributed data. For proportional data (normalized histograms), this is not the case. Due to the sum-to-unity constraint, features that are as independent as possible will not all be uncorrelated. In this paper, we show that a linear-time transformation efficiently copes with sum-to-unity constraints: first, we select a small number K of diverse data points by maximizing the volume of the simplex spanned by these prototypes; second, we represent each data point by means of its cosine similarities to the K selected prototypes. This maximum volume hashing is sensible since each dimension in the transformed space is likely to follow a von Mises (vM) distribution, and, in very high dimensions, the vM distribution closely resembles a Gaussian distribution. This justifies to employ PCA on the transformed data. Our extensive experiments validate this: maximum volume hashing outperforms spectral hashing and other state of the art techniques.},
  	Organization = {IEEE},
  	Pages = {735--744},
  	Title = {Efficient learning for hashing proportional data},
  	Url = {./papers/xu2012efficient.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/xu2012efficient.pdf}}

  @inproceedings{wahabzada2012uai,
  	Anote = {./images/wahabzada2012latent.png},
  	Author = {Mirwaes Wahabzada and Kristian Kersting and Christian Bauckhage and Christoph Römer and Agim Ballvora and Francisco Pinto and Uwe Rascher and Jens Leon and Lutz Plümer},
  	Booktitle = {Proceedings of the 28th Conference on Uncertainty in Artificial Intelligence (UAI)},
  	Keywords = {Plant Phenotyping, Sustainability, Latent Dirichlet Allocation, Online, Variational},
  	Note = {Understanding the adaptation process of plants to drought stress is essential in improving management practices, breeding strategies as well as engineering viable crops for a sustainable agriculture in the coming decades. Hyper-spectral imaging provides a particularly promising approach to gain such understanding since it allows to discover non-destructively spectral characteristics of plants governed primarily by scattering and absorption characteristics of the leaf internal structure and biochemical constituents. Several drought stress indices have been derived using hyper-spectral imaging. However, they are typically based on few hyper-spectral images only, rely on interpretations of experts, and consider few wavelengths only. In this study, we present the first data-driven approach to discovering spectral drought stress indices, treating it as an unsupervised labeling problem at massive scale. To make use of short range dependencies of spectral wavelengths, we develop an online variational Bayes algorithm for latent Dirichlet allocation with convolved Dirichlet regularizer. This approach scales to massive datasets and, hence, provides a more objective complement to plant physiological practices. The spectral topics found conform to plant physiological knowledge and can be computed in a fraction of the time compared to existing LDA approaches.},
  	Title = {Latent dirichlet allocation uncovers spectral characteristics of drought stressed plants},
  	Url = {https://arxiv.org/pdf/1210.4919.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1210.4919.pdf}}

  @incollection{khot2012structure,
  	Author = {Tushar Khot and Sriraam Natarajan and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Working Notes of the ICML Workshop on Statistical Relational Learning (SRL)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Missing Data, Markov Logic Networks},
  	Note = {Recent years have seen a surge of interest in learning the structure of Statistical Relational Learning (SRL) models, which combine logic with probabilities. Most of these models apply the closed-world assumption i.e., whatever is not observed is false in the world. We consider the problem of learning the structure of SRL models in the presence of hidden data, i.e. we open the closed-world assumption. We develop a functional-gradient boosting algorithm based on EM to learn the structure and parameters of the models simultaneously and apply it to learn two kinds of models -- Relational Dependency Networks (RDNs) and Markov Logic Networks (MLNs). Our results in two testbeds demonstrate that the algorithms can effectively learn with missing data.},
  	Title = {Structure Learning with Hidden Data in Relational Domains},
  	Url = {./papers/khot2012structure.pdf},
  	Year = {2012},
  	Bdsk-Url-1 = {./papers/khot2012structure.pdf}}

  @incollection{neumann2012propagation,
  	Author = {Marion Neumann and Roman Garnett and Plinio Moreno and Novi Patricia and Kristian Kersting},
  	Booktitle = {Working Notes of the ICML Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {GraphKernels, Weisfeiler Lehmann, Random Walks, Label Propagation},
  	Note = {Learning from complex data is becoming increasingly important, and graph kernels have recently evolved into a rapidly developing branch of learning on structured data. However, previously proposed kernels rely on having discrete node label information. Propagation kernels leverage the power of continuous node label distributions as graph features and hence, enhance traditional graph kernels to efficiently handle partially labeled graphs in a principled manner. Utilizing localitysensitive hashing, propagation kernels are able to outperform state-of-the-art graph kernels in terms of runtime without loss in prediction accuracy. This paper investigates the power of propagation kernels to classify partially labeled images and to tackle the challenging problem of retrieving similar object views in robotic grasping},
  	Title = {Propagation kernels for partially labeled graphs},
  	Year = {2012}}

  @incollection{natarajan2012early,
  	Author = {Sriraam Natarajan and Kristian Kersting and Saket Joshi and Santiago Saldana and Edward Ip and D Jacobs and Jeffrey Carr},
  	Booktitle = {Working Notes of the ICML Workshop on Machine Learning for Clinical Data Analysis},
  	Keywords = {Statistical Relational Learning, Coronary Artery Calcification, Medicine, Longitudinal Study, Funcational Gradient Boosting},
  	Note = {Our investigation demonstrates that a statistical relational analysis of longitudinal data can easily uncover complex interactions of risks factors and actually predict future coronary artery calcification (CAC) levels --- an indicator of the risk of CHD present subclinically in an individual --- significantly better than traditional non-relational approaches.},
  	Pages = {30},
  	Title = {Early Prediction of Coronary Artery Calcification Levels Using Statistical Relational Learning},
  	Year = {2012}}

  @incollection{ahmadi2012lifted_ws,
  	Author = {Babak Ahmadi and Kristian Kersting and Sriraam Natarajan},
  	Booktitle = {Working Notes of the ICML-2012 Workshop on Statistical Relational Learning(SRL)},
  	Keywords = {Statistical Relational Learning, Symmetries, Parameter Estimation, Markov Logic Networks},
  	Note = {Lifted inference approaches have rendered large, previously intractable probabilistic inference problems quickly solvable by employing symmetries to handle whole sets of indistinguishable random variables. Still, in many if not most situations training relational models will not benefit from lifting: symmetries within models easily break since variables become correlated by virtue of depending asymmetrically on evidence. An appealing idea for such situations is to train and recombine local models. This breaks longrange dependencies and allows to exploit lifting within and across the local training tasks. Moreover, it naturally paves the way for online training for relational models. Specifically, we develop the first lifted stochastic gradient optimization method with gain vector adaptation, processing each lifted piece one after the other.},
  	Title = {Lifted Parameter Learning in Relational Models},
  	Year = {2012}}

  @incollection{poole2012aggregation,
  	Author = {David Poole and David Buchman and Sriraam Natarajan and Kristian Kersting},
  	Booktitle = {Working Notes of the UAI Workshop on Statistical Relational AI (StarAI)},
  	Keywords = {Statistical Relational Learning, Population Size, Relational Logistic Regression},
  	Note = {This paper considers how relational probabilistic models adapt to population size. First we show that what are arbitrary choices for nonrelational domains become a commitment to how a relational model adapts to population change. We show how this manifests in a directed model where the conditional probabilities are represented using the logistic function, and show why it needs to be extended to a relational logistic function. Second we prove that directed aggregation models cannot be represented by Markov Logic without clauses that involve multiple individuals. Third we show how these models change as a function of population size.},
  	Title = {Aggregation and population growth: The relational logistic regression and Markov logic cases},
  	Year = {2012}}

  @book{kersting2012stairs,
  	Anote = {./images/kersting2012stairs.jpg},
  	Author = {Kristian Kersting and Marc Toussaint},
  	Keywords = {Proceedings},
  	Note = {The field of Artificial Intelligence is one in which novel ideas and new and original perspectives are of more than usual importance. The Starting AI Researchers Symposium (STAIRS) is an international meeting which supports AI researchers from all countries at the beginning of their career, PhD students and those who have held a PhD for less than one year. It offers doctoral students and young post-doctoral AI fellows a unique and valuable opportunity to gain experience in presenting their work in a supportive scientific environment, where they can obtain constructive feedback on the technical content of their work, as well as advice on how to present it, and where they can also establish contacts with the broader European AI research community. This book presents revised versions of peer-reviewed papers presented at the Sixth STAIRS, which took place in Montpellier, France, in conjunction with the 20th European Conference on Artificial Intelligence (ECAI) and the Seventh Conference on Prestigious Applications of Intelligent Systems (PAIS) in August 2012. The topics covered in the book range over a broad spectrum of subjects in the field of AI: machine learning and data mining, constraint satisfaction problems and belief propagation, logic and reasoning, dialogue and multiagent systems, and games and planning. Offering a fascinating opportunity to glimpse the current work of the AI researchers of the future, this book will be of interest to anyone whose work involves the use of artificial intelligence and intelligent systems.},
  	Publisher = {IOS Press},
  	Title = {Proceedings of the Sixth Starting AI Researchers' Symposium (STAIRS)},
  	Url = {http://www.iospress.nl/book/stairs-2012/},
  	Year = {2012},
  	Bdsk-Url-1 = {http://www.iospress.nl/book/stairs-2012/}}

  @article{natarajan2012gradient,
  	Anote = {./images/natarajan2012gradient.png},
  	Author = {Sriraam Natarajan and Tushar Khot and Kristian Kersting and Bernd Gutmann and Jude Shavlik},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Dependency Networks, Relational Domains},
  	Note = {Dependency networks approximate a joint probability distribution over multiple random variables as a product of conditional distributions. Relational Dependency Networks (RDNs) are graphical models that extend dependency networks to relational domains. This higher expressivity, however, comes at the expense of a more complex model-selection problem: an unbounded number of relational abstraction levels might need to be explored. Whereas current learning approaches for RDNs learn a single probability tree per random variable, we propose to turn the problem into a series of relational function-approximation problems using gradient-based boosting. In doing so, one can easily induce highly complex features over several iterations and in turn estimate quickly a very expressive model. Our experimental results in several different data sets show that this boosting method results in efficient learning of RDNs when compared to state-of-the-art statistical relational learning approaches.},
  	Number = {1},
  	Pages = {25--56},
  	Publisher = {Springer},
  	Title = {Gradient-based boosting for statistical relational learning: The relational dependency network case},
  	Url = {https://link.springer.com/content/pdf/10.1007%2Fs10994-011-5244-9.pdf},
  	Volume = {86},
  	Year = {2012},
  	Bdsk-Url-1 = {https://link.springer.com/content/pdf/10.1007%2Fs10994-011-5244-9.pdf}}

  @article{thurau2012descriptive,
  	Anote = {./images/thurau2012descriptive.png},
  	Author = {Christian Thurau and Kristian Kersting and Mirwaes Wahabzada and Christian Bauckhage},
  	Journal = {Data Mining and Knowledge Discovery (DAMI)},
  	Keywords = {Sustainability, Interpretable Matrix Factorization, Linear Time, Simplex Volume Maximization},
  	Note = {We discuss a new technique for factorizing data matrices. The basic idea is to represent a set of data by means of convex combinations of extreme data points. This often accommodates human cognition. In contrast to established factorization methods, the approach presented in this paper can also determine over-complete bases. At the same time, convex combinations allow for highly efficient matrix factorization. Based on techniques adopted from the field of distance geometry, we derive a linear time algorithm to determine suitable basis vectors for factorization. By means of the example of several environmental and developmental data sets we discuss the performance and characteristics of the proposed approach and validate that significant efficiency gains are obtainable without performance decreases compared to existing convexity constrained approaches.},
  	Number = {2},
  	Pages = {325--354},
  	Publisher = {Springer},
  	Title = {Descriptive matrix factorization for sustainability adopting the principle of opposites},
  	Url = {http://static.springer.com/sgw/documents/1387633/application/pdf/10-5.pdf},
  	Volume = {24},
  	Year = {2012},
  	Bdsk-Url-1 = {http://static.springer.com/sgw/documents/1387633/application/pdf/10-5.pdf}}

  @article{romer2012early,
  	Anote = {./images/fpb2012sivm.png},
  	Author = {Christoph Römer and Mirwaes Wahabzada and Agim Ballvora and Francisco Pinto and Micol Rossini and Cinzia Panigada and Jan Behmann and Jens Leon and Christian Thurau and Christian Bauckhage and Kristian Kersting and Uwe Rascher and Lutz Plümer},
  	Journal = {Functional Plant Biology},
  	Keywords = {Sustainability, Plant Phenotyping, Simplex Volume maximization, Achetypal Analysis, Drougth Stress, Interpretable Matrix Factorization},
  	Note = {Early water stress recognition is of great relevance in precision plant breeding and production. Hyperspectral imaging sensors can be a valuable tool for early stress detection with high spatio-temporal resolution. They gather large, high dimensional data cubes posing a significant challenge to data analysis. Classical supervised learning algorithms often fail in applied plant sciences due to their need of labelled datasets, which are difficult to obtain. Therefore, new approaches for unsupervised learning of relevant patterns are needed. We apply for the first time a recent matrix factorisation technique, simplex volume maximisation (SiVM), to hyperspectral data. It is an unsupervised classification approach, optimised for fast computation of massive datasets. It allows calculation of how similar each spectrum is to observed typical spectra. This provides the means to express how likely it is that one plant is suffering from stress. The method was tested for drought stress, applied to potted barley plants in a controlled rain-out shelter experiment and to agricultural corn plots subjected to a two factorial field setup altering water and nutrient availability. Both experiments were conducted on the canopy level. SiVM was significantly better than using a combination of established vegetation indices. In the corn plots, SiVM clearly separated the different treatments, even though the effects on leaf and canopy traits were subtle.},
  	Number = {11},
  	Pages = {878--890},
  	Publisher = {CSIRO PUBLISHING},
  	Title = {Early drought stress detection in cereals: simplex volume maximisation for hyperspectral image analysis},
  	Url = {https://ai2-s2-pdfs.s3.amazonaws.com/2a48/cd6a560752cb72299ea3d1f76370b26a7b2c.pdf},
  	Volume = {39},
  	Year = {2012},
  	Bdsk-Url-1 = {https://ai2-s2-pdfs.s3.amazonaws.com/2a48/cd6a560752cb72299ea3d1f76370b26a7b2c.pdf}}

  @article{bauckhage2012agriculture,
  	Author = {Christian Bauckhage and Kristian Kersting and Albrecht Schmidt},
  	Journal = {IEEE Pervasive Computing},
  	Keywords = {Overview, Agriculture, Sustainability},
  	Note = {Agriculture, one of the oldest economic sectors of humankind, is getting a technological makeover. Pervasive computing puts the vision of a sustainable agriculture within reach for anyone, any time, anywhere.},
  	Number = {2},
  	Pages = {4--7},
  	Publisher = {IEEE Press},
  	Title = {Agriculture's Technological Makeover},
  	Url = {https://www.researchgate.net/profile/Christian_Bauckhage/publication/254058744_Agriculture%27s_Technological_Makeover/links/56001b2a08aec948c4fa6590/Agricultures-Technological-Makeover.pdf},
  	Volume = {11},
  	Year = {2012},
  	Bdsk-Url-1 = {https://www.researchgate.net/profile/Christian_Bauckhage/publication/254058744_Agriculture%27s_Technological_Makeover/links/56001b2a08aec948c4fa6590/Agricultures-Technological-Makeover.pdf}}

  @article{lang2012exploration,
  	Anote = {./images/jmlr2012exploration.png},
  	Author = {Tobias Lang and Marc Toussaint and Kristian Kersting},
  	Journal = {Journal of Machine Learning Research (JMLR)},
  	Keywords = {Optimal Actions, Exploration vs. Exploitation, Statistical Relational AI, Statstical Relational Learning, Relational Reinforcement Learning, Markov Decision processes},
  	Note = {A fundamental problem in reinforcement learning is balancing exploration and exploitation. We address this problem in the context of model-based reinforcement learning in large stochastic relational domains by developing relational extensions of the concepts of the E3 and R-MAX algorithms. Efficient exploration in exponentially large state spaces needs to exploit the generalization of the learned model: what in a propositional setting would be considered a novel situation and worth exploration may in the relational setting be a well-known context in which exploitation is promising. To address this we introduce relational count functions which generalize the classical notion of state and action visitation counts. We provide guarantees on the exploration efficiency of our framework using count functions under the assumption that we had a relational KWIK learner and a near-optimal planner. We propose a concrete exploration algorithm which integrates a practically efficient probabilistic rule learner and a relational planner (for which there are no guarantees, however) and employs the contexts of learned relational rules as features to model the novelty of states and actions. Our results in noisy 3D simulated robot manipulation problems and in domains of the international planning competition demonstrate that our approach is more effective than existing propositional and factored exploration techniques.},
  	Number = {Dec},
  	Pages = {3725--3768},
  	Title = {Exploration in relational domains for model-based reinforcement learning},
  	Url = {http://www.jmlr.org/papers/volume13/lang12a/lang12a.pdf},
  	Volume = {13},
  	Year = {2012},
  	Bdsk-Url-1 = {http://www.jmlr.org/papers/volume13/lang12a/lang12a.pdf}}

  @inproceedings{ahmadi2011multi,
  	Anote = {./images/ahmadi2011multi.png},
  	Author = {Babak Ahmadi and Kristian Kersting and Scott Sanner},
  	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Symmetries, Kalman Filter, Page Rank, Multiple Evidence},
  	Note = {Lifted message passing algorithms exploit repeated structure within a given graphical model to answer queries efficiently. Given evidence, they construct a lifted network of supernodes and superpotentials corresponding to sets of nodes and potentials that are indistinguishable given the evidence. Recently, efficient algorithms were presented for updating the structure of an existing lifted network with incremental changes to the evidence. In the inference stage, however, current algorithms need to construct a separate lifted network for each evidence case and run a modified message passing algorithm on each lifted network separately. Consequently, symmetries across the inference tasks are not exploited. In this paper, we present a novel lifted message passing technique that exploits symmetries across multiple evidence cases. The benefits of this multi-evidence lifted inference are shown for several important AI tasks such as computing personalized PageRanks and Kalman filters via multievidence lifted Gaussian belief propagation.},
  	Pages = {1152},
  	Title = {Multi-evidence lifted message passing, with application to pagerank and the kalman filter},
  	Url = {./papers/ahmadi2011multi.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/ahmadi2011multi.pdf}}

  @inproceedings{wahabzada2011more,
  	Anote = {./images/wahabzada2011more.png},
  	Author = {Mirwaes Wahabzada and Kristian Kersting and Anja Pilz and Christian Bauckhage},
  	Booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management (CIKM)},
  	Keywords = {Stochastic Gradient, Probabilsitic Topics Models, Latent Dirichlet Allocation, Active Learning, Mini-Batch Scheduling},
  	Note = {There have recently been considerable advances in fast inference for (online) latent Dirichlet allocation (LDA). While it is widely recognized that the scheduling of documents in stochastic optimization and in turn in LDA may have significant consequences, this issue remains largely unexplored. Instead, practitioners schedule documents essentially uniformly at random, due perhaps to ease of implementation, and to the lack of clear guidelines on scheduling the documents. In this work, we address this issue and propose to schedule documents for an update that exert a disproportionately large influence on the topics of the corpus before less influential ones. More precisely, we justify to sample documents randomly biased towards those ones with higher norms to form mini-batches. On several real-world datasets, including 3M articles from Wikipedia and 8M from PubMed, we demonstrate that the resulting influence scheduled LDA can handily analyze massive document collections and find topic models as good or better than those found with online LDA, often at a fraction of time.},
  	Organization = {ACM},
  	Pages = {2273--2276},
  	Title = {More influence means less work: fast latent Dirichlet allocation by influence scheduling},
  	Url = {./papers/wahabzada2011more.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/wahabzada2011more.pdf}}

  @inproceedings{wahabzada2011larger,
  	Anote = {./images/wahabzada2011larger.png},
  	Author = {Mirwaes Wahabzada and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Probabilistic Topic Models, Latent Dirichlet Allocation, Active Learning, Stochastic Gradient, Informed Sampling},
  	Note = {Recently, there have been considerable advances in fast inference for latent Dirichlet allocation (LDA). In particular, stochastic optimization of the variational Bayes (VB) objective function with a natural gradient step was proved to converge and able to process massive document collections. To reduce noise in the gradient estimation, it considers multiple documents chosen uniformly at random. While it is widely recognized that the scheduling of documents in stochastic optimization may have significant consequences, this issue remains largely unexplored. In this work, we address this issue. Specifically, we propose residual LDA, a novel, easy-to-implement, LDA approach that schedules documents in an informed way. Intuitively, in each iteration, residual LDA actively selects documents that exert a disproportionately large influence on the current residual to compute the next update. On several real-world datasets, including 3M articles from Wikipedia, we demonstrate that residual LDA can handily analyze massive document collections and find topic models as good or better than those found with batch VB and randomly scheduled VB, and significantly faster.},
  	Organization = {Springer},
  	Pages = {475--490},
  	Title = {Larger residuals, less work: Active document scheduling for latent Dirichlet allocation},
  	Url = {./papers/wahabzada2011larger.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/wahabzada2011larger.pdf}}

  @inproceedings{jawad2011traffic,
  	Anote = {./images/jawad2011traffic.png},
  	Author = {Ahmed Jawad and Kristian Kersting and Natalia Andrienko},
  	Booktitle = {Proceedings of the 19th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems (GIS)},
  	Keywords = {Sequence Logo, Traffic Data},
  	Note = {Traffic and mobility mining are fascinating and fast growing areas of data mining and geographical information systems that impact the lives of billions of people every day. Another well-known scientific field that impacts lives of billions is biological sequence analysis. It has experienced an incredible evolution in the recent decade, especially since the Human Genome project. Although, a very first link between both fields has been established already in the early 90ies, many recent papers on mobility mining seem to be unaware of it. We therefore revisit the link and show that many unexplored and novel mobility mining methods fall naturally out of it. Specifically, using advanced discretization techniques for stay-point detection and map matching, we turn traffic sequences into a biological ones. Then, we introduce a novel distance function that enables us to directly apply the rich toolbox for biological sequence analysis to it. For instance, by just looking at co mplex traffic data through the biological glasses of sequence logos we get a novel, easy-to-grasp visualization of data, called Traffic Logos. For clustering and prediction tasks, our empirical evaluation on three real-world data sets demonstrates that revisiting the link can yield performance as good as state-of-the-art data mining techniques.},
  	Organization = {ACM},
  	Pages = {357--360},
  	Title = {Where traffic meets dna: mobility mining using biological sequence analysis revisited},
  	Url = {./papers/jawad2011traffic.pdf},
  	Year = {2011},
    Key = {Best Poster Award at GIS 2011},
  	Bdsk-Url-1 = {./papers/jawad2011traffic.pdf}}

  @inproceedings{neumann2011markov,
  	Anote = {./images/neumann2011markov.png},
  	Author = {Marion Neumann and Babak Ahmadi and Kristian Kersting},
  	Booktitle = {Proceedings of the 25th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relatonal Leanring, Clustering on Demand, Information Retrieval},
  	Note = {Inspired by ``GoogleTM Sets'' and Bayesian sets, we consider the problem of retrieving complex objects and relations among them, i.e., ground atoms from a logical concept, given a query consisting of a few atoms from that concept. We formulate this as a within-network relational learning problem using few labels only and describe an algorithm that ranks atoms using a score based on random walks with restart (RWR): the probability that a random surfer hits an atom starting from the query atoms. Specifically, we compute an initial ranking using personalized PageRank. Then, we find paths of atoms that are connected via their arguments, variablize the ground atoms in each path, in order to create features for the query. These features are used to re-personalize the original RWR and to finally compute the set completion, based on Label Propagation. Moreover, we exploit that RWR techniques can naturally be lifted and show that lifted inference for label propagation is possible. We evaluate our algorithm on a realworld relational dataset by finding completions of sets of objects describing the Roman city of Pompeii. We compare to Bayesian sets and show that our approach gives very reasonable set completions.},
  	Title = {Markov Logic Sets: Towards Lifted Information Retrieval Using PageRank and Label Propagation},
  	Url = {./papers/neumann2011markov.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/neumann2011markov.pdf}}

  @inproceedings{hadiji2011efficient,
  	Anote = {./images/hadiji2011efficient.png},
  	Author = {Fabian Hadiji and Babak Ahmadi and Kristian Kersting},
  	Booktitle = {Annual German Conference on Artificial Intelligence (KI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, High-order Marginals, Sequental Clamping, Sampling},
  	Note = {Lifted message passing approaches can be extremely fast at computing approximate marginal probability distributions over single variables and neighboring ones in the underlying graphical model. They do, however, not prescribe a way to solve more complex inference tasks such as computing joint marginals for k-tuples of distant random variables or satisfying assignments of CNFs. A popular solution in these cases is the idea of turning the complex inference task into a sequence of simpler ones by selecting and clamping variables one at a time and running lifted message passing again after each selection. This naive solution, however, recomputes the lifted network in each step from scratch, therefore often canceling the benefits of lifted inference. We show how to avoid this by efficiently computing the lifted network for each conditioning directly from the one already known for the single node marginals. Our experiments show that significant efficiency gains are possible for lifted message passing guided decimation for SAT and sampling.},
  	Organization = {Springer Berlin Heidelberg},
  	Pages = {122--133},
  	Title = {Efficient sequential clamping for lifted message passing},
  	Url = {./papers/hadiji2011efficient.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/hadiji2011efficient.pdf}}

  @inproceedings{natarajan2011imitation,
  	Anote = {./images/natarajan2011imitation.png},
  	Author = {Sriraam Natarajan and Saket Joshi and Prasad Tadepalli and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Imitation Learning, Relational Domains},
  	Note = {Imitation learning refers to the problem of learning how to behave by observing a teacher in action. We consider imitation learning in relational domains, in which there is a varying number of objects and relations among them. In prior work, simple relational policies are learned by viewing imitation learning as supervised learning of a function from states to actions. For propositional worlds, functional gradient methods have been proved to be beneficial. They are simpler to implement than most existing methods, more efficient, more naturally satisfy common constraints on the cost function, and better represent our prior beliefs about the form of the function. Building on recent generalizations of functional gradient boosting to relational representations, we implement a functional gradient boosting approach to imitation learning in relational domains. In particular, given a set of traces from the human teacher, our system learns a policy in the form of a set of relational regression trees that additively approximate the functional gradients. The use of multiple additive trees combined with relational representation allows for learning more expressive policies than what has been done before. We demonstrate the usefulness of our approach in several different domains.},
  	Number = {1},
  	Title = {Imitation learning in relational domains: A functional-gradient boosting approach},
  	Url = {./papers/natarajan2011imitation.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/natarajan2011imitation.pdf}}

  @inproceedings{xu2011multi,
  	Anote = {./images/xu2011multi.png},
  	Author = {Zhao Xu and Kristian Kersting},
  	Booktitle = {Proceedings of the 11th IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Statistical Relational Learning, Gaussian Processes, Multi-Task Learning},
  	Note = {Multi-task and relational learning with Gaussian processes are two active but also orthogonal areas of research. So far, there has been few attempt at exploring relational information within multi-task Gaussian processes. While existing relational Gaussian process methods have focused on relations among entities and in turn could be employed within an individual task, we develop a class of Gaussian process models which incorporates relational information across multiple tasks. As we will show, inference and learning within the resulting class of models, called relational multi-task Gaussian processes, can be realized via a variational EM algorithm. Experimental results on synthetic and real-world datasets verify the usefulness of this approach: The observed relational knowledge at the level of tasks can indeed reveal additional pair wise correlations between tasks of interest and, in turn, improve prediction performance.},
  	Organization = {IEEE},
  	Pages = {884--893},
  	Title = {Multi-task learning with task relations},
  	Url = {./papers/xu2011multi.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/xu2011multi.pdf}}

  @inproceedings{khot2011learning,
  	Anote = {./images/mlj2015boosting.png},
  	Author = {Tushar Khot and Sriraam Natarajan and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Proceedings of the 11th IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Statistical Relational Learning, Funcational Gradient Boosting, Markov Logic Networks, Structure Learning},
  	Note = {Recent years have seen a surge of interest in Statistical Relational Learning (SRL) models that combine logic with probabilities. One prominent example is Markov Logic Networks (MLNs). While MLNs are indeed highly expressive, this expressiveness comes at a cost. Learning MLNs is a hard problem and therefore has attracted much interest in the SRL community. Current methods for learning MLNs follow a twostep approach: first, perform a search through the space of possible clauses and then learn appropriate weights for these clauses. We propose to take a different approach, namely to learn both the weights and the structure of the MLN simultaneously. Our approach is based on functional gradient boosting where the problem of learning MLNs is turned into a series of relational functional approximation problems. We use two kinds of representations for the gradients: clausebased and tree-based. Our experimental evaluation on several benchmark data sets demonstrates that our new approach can learn MLNs as good or better than those found with state-ofthe-art methods, but often in a fraction of the time.},
  	Organization = {IEEE},
  	Pages = {320--329},
  	Title = {Learning markov logic networks via functional gradient boosting},
  	Url = {./papers/khot2011learning.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/khot2011learning.pdf}}

  @incollection{sanner2011symbolic,
  	Author = {Scott Sanner and Kristian Kersting},
  	Booktitle = {Encyclopedia of machine learning},
  	Keywords = {Introduction, Symbolic Dynamic Programming, Optimal Actions, Relational Markov Decision Processes},
  	Pages = {946--954},
  	Publisher = {Springer US},
  	Title = {Symbolic dynamic programming},
  	Year = {2011}}

  @incollection{de2011statistical,
  	Author = {Luc De Raedt and Kristian Kersting},
  	Booktitle = {Encyclopedia of Machine Learning},
  	Keywords = {Intoduction, Statistical Relational Learning},
  	Pages = {916--924},
  	Publisher = {Springer US},
  	Title = {Statistical relational learning},
  	Year = {2011}}

  @incollection{ahmadi2011scientist,
  	Anote = {./images/ahmadi2011scientist.png},
  	Author = {Babak Ahmadi and Salah Zayakh and Fabian Hadiji and Kristian Kersting},
  	Booktitle = {Working Notes of the Lernen, Wissen, Adaptivit{\"a}t (LWA) Workshop},
  	Keywords = {DBLP, Affiliation Propagation, Geo-tags},
  	Note = {Today, electronic scholarly articles are available freely at the point of use. Moreover, bibliographic systems such as DBLP, ACM's Digital Libraries, Google's Scholar, and Microsoft's AcademicSearch provide means to search and analyze bibliographic information. However, one important information is typically incomplete, wrong, or even missing: the affiliation of authors. This type of information can be valuable not only for finding and tracking scientists using map interfaces but also for automatic detection of con- flict of interests and, in aggregate form, for helping to understand topics and trends in science at global scale. In this work-in-progress report, we consider the problem of retrieving affiliations from few observed affiliations only. Specifically, we crawl ACM's Digital Libraries for affiliations of authors listed in DBLP. Then, we employ multi-label propagation to propagate the few observed affiliations through out a network induced by a Markov logic network on DBLP entries. We use the propagated affiliations to create a visualization tool, PubMap, that can help expose the affiliations, using a map interface to display the propagated affiliations. Furthermore, we motivate how the information about affiliations can be used in publication summarization.},
  	Pages = {133--136},
  	Title = {O Scientist, Where Art Thou? Affiliation Propagation for Geo-Referencing Scientific Publications},
  	Url = {./papers/ahmadi2011scientist.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/ahmadi2011scientist.pdf}}

  @incollection{ahmadi2011lifted,
  	Author = {Babak Ahmadi and Martin Mladenov and Kristian Kersting and Scott Sanner},
  	Booktitle = {Working Notes of the Lernen, Wissen, Adaptivit{\"a}t (LWA) Workshop},
  	Keywords = {Statistical Relational AI, Lifted Inference, Page Rank, Kalman Filter},
  	Note = {Lifted message passing algorithms exploit repeated structure within a given graphical model to answer queries efficiently. Given evidence, they construct a lifted network of supernodes and superpotentials corresponding to sets of nodes and potentials that are indistinguishable given the evidence. Recently, efficient algorithms were presented for updating the structure of an existing lifted network with incremental changes to the evidence. In the inference stage, however, current algorithms need to construct a separate lifted network for each evidence case and run a modified message passing algorithm on each lifted network separately. Consequently, symmetries across the inference tasks are not exploited. In this paper, we present a novel lifted message passing technique that exploits symmetries across multiple evidence cases. The benefits of this multi-evidence lifted inference are shown for several important AI tasks such as solving linear programs, computing personalized PageRanks and Kalman filters via multi-evidence lifted Gaussian belief propagation.},
  	Pages = {35--42},
  	Title = {On Lifted PageRank, Kalman Filter and Towards Lifted Linear Program Solving},
  	Year = {2011}}

  @incollection{kersting2011statistical,
  	Author = {Kristian Kersting and Sriraam Natarajan and David Poole},
  	Booktitle = {Proceedings of the 11th International Conference on Logic Programming and Nonmonotonic Reasoning (LPNMR)},
  	Keywords = {Statistical Relaitonal AI, Overview},
  	Note = {One of the key challenges in building intelligent agents is closing the gap between logical and statistical AI, so that we can have rich representations including objects, relations and uncertainty, that we can effectively learn and carry out inference with. Over the last 25 years there has been a considerable body of research into combinations of predicate logic and probability forming what has become known as statistical relational artifi- cial intelligence (StaR-AI). We overview the foundations of the area, give some research problems, proposed solutions, outstanding issues, and clear up some misconceptions that have arisen. We discuss representations, semantics, inference, learning and applications, and provide references to the literature.},
  	Pages = {1--9},
  	Title = {Statistical relational AI: logic, probability and computation},
  	Url = {./papers/kersting2011statistical.pdf},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/kersting2011statistical.pdf}}

  @article{thurau2011convex,
  	Anote = {./images/thurau2011convex.png},
  	Author = {Christian Thurau and Kristian Kersting and Mirwaes Wahabzada and Christian Bauckhage},
  	Journal = {Knowledge and information systems (KAIS)},
  	Keywords = {Interpretable Matrix Factorization, Convex, Massive Data, Non-Negative},
  	Note = {Non-negative matrix factorization (NMF) has become a standard tool in data mining, information retrieval, and signal processing. It is used to factorize a non-negative data matrix into two non-negative matrix factors that contain basis elements and linear coefficients, respectively. Often, the columns of the first resulting factor are interpreted as ``cluster centroids'' of the input data, and the columns of the second factor are understood to contain cluster membership indicators. When analyzing data such as collections of gene expressions, documents, or images, it is often beneficial to ensure that the resulting cluster centroids are meaningful, for instance, by restricting them to be convex combinations of data points. However, known approaches to convex-NMF suffer from high computational costs and therefore hardly apply to large-scale data analysis problems. This paper presents a new framework for convex-NMF that allows for an efficient factorization of data matrices of millions of data points. Triggered by the simple observation that each data point can be expressed as a convex combination of vertices of the data convex hull, we require the basic factors to be vertices of the data convex hull. The benefits of convex-hull NMF are twofold. First, for a growing number of data points the expected size of the convex hull, i.e. the number of its vertices, grows much slower than the dataset. Second, distance preserving low-dimensional embeddings allow us to efficiently sample the convex hull and hence to quickly determine candidate vertices. Our extensive experimental evaluation on large datasets shows that convex-hull NMF compares favorably to convex-NMF in terms of both speed and reconstruction quality. We demonstrate that our method can easily be applied to large-scale, real-world datasets, in our case consisting of 750,000 DBLP entries, 4,000,000 digital images, and 150,000,000 votes on World of Warcraft {\textregistered}guilds, respectively.},
  	Number = {2},
  	Pages = {457--478},
  	Publisher = {Springer},
  	Title = {Convex non-negative matrix factorization for massive datasets},
  	Url = {./papers/thurau2011convex.pdf},
  	Volume = {29},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/thurau2011convex.pdf}}

  @article{joshi2011decision,
  	Anote = {./images/joshi2011decision.png},
  	Author = {Saket Joshi and Kristian Kersting and Roni Khardon},
  	Journal = {Artificial Intelligence (AIJ)},
  	Keywords = {Statistical Relational AI, Lifted Inference, First Oder Decision Diagrams, Optimal Actions, Relational Markov Decision Processes},
  	Note = {Many tasks in AI require representation and manipulation of complex functions. First order decision diagrams (FODD) are a compact knowledge representation expressing functions over relational structures. They represent numerical functions that, when constrained to the Boolean range, use only existential quantification. Previous work has developed a set of operations for composition and for removing redundancies in FODDs, thus keeping them compact, and showed how to successfully employ FODDs for solving large-scale stochastic planning problems through the formalism of relational Markov decision processes (RMDP). In this paper, we introduce several new ideas enhancing the applicability of FODDs. More specifically, we first introduce Generalized FODDs (GFODD) and composition operations for them, generalizing FODDs to arbitrary quantification. Second, we develop a novel approach for reducing (G)FODDs using model checking. This yields -- for the first time -- a reduction that maximally reduces the diagram for the FODD case and provides a sound reduction procedure for GFODDs. Finally we show how GFODDs can be used in principle to solve RMDPs with arbitrary quantification, and develop a complete solution for the case where the reward function is specified using an arbitrary number of existential quantifiers followed by an arbitrary number of universal quantifiers.},
  	Number = {18},
  	Pages = {2198--2222},
  	Publisher = {Elsevier},
  	Title = {Decision-theoretic planning with generalized first-order decision diagrams},
  	Url = {./papers/joshi2011decision.pdf},
  	Volume = {175},
  	Year = {2011},
  	Bdsk-Url-1 = {./papers/joshi2011decision.pdf}}

  @article{schmidt2011perception,
  	Author = {Albrecht Schmidt and Marc Langheinrich and Kristian Kersting},
  	Journal = {IEEE Computer},
  	Keywords = {Position Statement, Cyber-Physical Systems},
  	Note = {Emerging sensor-equipped computing devices are overcoming longstanding temporal and spatial boundaries to human perception.},
  	Number = {2},
  	Pages = {86--88},
  	Publisher = {IEEE Computer Society},
  	Title = {Perception beyond the Here and Now},
  	Ural = {./papers/schmidt2011perception.pdf},
  	Volume = {44},
  	Year = {2011}}

  @inproceedings{sanner2010symbolic,
  	Anote = {./images/sanner2010symbolic.png},
  	Author = {Scott Sanner and Kristian Kersting},
  	Booktitle = {Proceedings of the 24th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relational AI, Symbolic Dynamic Programming, Relational Markov Decision Processes, Partially Observed},
  	Note = {Partially-observable Markov decision processes (POMDPs) provide a powerful model for sequential decision-making problems with partially-observed state and are known to have (approximately) optimal dynamic programming solutions. Much work in recent years has focused on improving the effi- ciency of these dynamic programming algorithms by exploiting symmetries and factored or relational representations. In this work, we show that it is also possible to exploit the full expressive power of first-order quantification to achieve state, action, and observation abstraction in a dynamic programming solution to relationally specified POMDPs. Among the advantages of this approach are the ability to maintain compact value function representations, abstract over the space of potentially optimal actions, and automatically derive compact conditional policy trees that minimally partition relational observation spaces according to distinctions that have an impact on policy values. This is the first lifted relational POMDP solution that can optimally accommodate actions with a potentially infinite relational space of observation outcomes.},
  	Pages = {1140--1146},
  	Title = {Symbolic Dynamic Programming for First-order POMDPs},
  	Url = {./papers/sanner2010symbolic.pdf},
  	Volume = {10},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/sanner2010symbolic.pdf}}

  @inproceedings{joshi2010self,
  	Anote = {./images/joshi2010self.png},
  	Author = {Saket Joshi and Kristian Kersting and Roni Khardon},
  	Booktitle = {Proceedings of the Twentieth International Conference on Automated Planning and Scheduling (ICAPS)},
  	Keywords = {Statistical Relational AI, Relational Markov Decison Processes, First Order Decision Diagrams, Sampling, Reduction},
  	Note = {We present a new paradigm for planning by learning, where the planner is given a model of the world and a small set of states of interest, but no indication of optimal actions in these states. The additional information can help focus the planner on regions of the state space that are of interest and lead to improved performance. We demonstrate this idea by introducing novel model-checking reduction operations for First Order Decision Diagrams (FODD), a representation that has been used to implement decision-theoretic planning with Relational Markov Decision Processes (RMDP). Intuitively, these reductions modify the construction of the value function by removing any complex specifications that are irrelevant to the set of training examples, thereby focusing on the region of interest. We show that such training examples can be constructed on the fly from a description of the planning problem thus we can bootstrap to get a self-taught planning system. Additionally, we provide a new heuristic to embed universal and conjunctive goals within the framework of RMDP planners, expanding the scope and applicability of such systems. We show that these ideas lead to significant improvements in performance in terms of both speed and coverage of the planner, yielding state of the art planning performance on problems from the International Planning Competition.},
  	Pages = {89--96},
  	Title = {Self-Taught Decision Theoretic Planning with First Order Decision Diagrams},
  	Url = {./papers/joshi2010self.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/joshi2010self.pdf}}

  @inproceedings{natarajan2010boosting,
  	Author = {Sriraam Natarajan and Tushar Khot and Kristian Kersting and Bernd Gutmann and Jude Shavlik},
  	Booktitle = {Proceedings of the 20th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Dependency Networks, Relational Domains},
  	Note = {Relational Dependency Networks (RDNs) are graphical models that extend dependency networks to relational domains where the joint probability distribution over the variables is approximated as a product of conditional distributions. The current learning algorithms for RDNs use pseudolikelihood techniques to learn probability trees for each variable in order to represent the conditional distribution. We propose the use of gradient tree boosting as applied by Dietterich et al. to approximate the gradient for each variable. The use of several regression trees, instead of just one, results in an expressive model. Our results in 3 different data sets show that this training method results in effi- cient learning of RDNs when compared to state-of-the-art approaches to Statistical Relational Learning.},
  	Pages = {1--8},
  	Title = {Boosting Relational Dependency Networks},
  	Url = {./papers/natarajan2010boosting.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/natarajan2010boosting.pdf}}

  @inproceedings{lang2010exploration,
  	Anote = {./images/lang2010exploration.png},
  	Author = {Tobias Lang and Marc Toussaint and Kristian Kersting},
  	Booktitle = {European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Statistical Relational AI, Exploration vs. Exploitation, Optimal Actions, Relational Domains},
  	Note = {One of the key problems in model-based reinforcement learning is balancing exploration and exploitation. Another is learning and acting in large relational domains, in which there is a varying number of objects and relations between them. We provide one of the first solutions to exploring large relational Markov decision processes by developing relational extensions of the concepts of the Explicit Explore or Exploit (E3) algorithm. A key insight is that the inherent generalization of learnt knowledge in the relational representation has profound implications also on the exploration strategy: what in a propositional setting would be considered a novel situation and worth exploration may in the relational setting be an instance of a well-known context in which exploitation is promising. Our experimental evaluation shows the effectiveness and benefit of relational exploration over several propositional benchmark approaches on noisy 3D simulated robot manipulation problems.},
  	Organization = {Springer},
  	Pages = {178--194},
  	Title = {Exploration in relational worlds},
  	Url = {./papers/lang2010exploration.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/lang2010exploration.pdf}}

  @inproceedings{quadrianto2010beyond,
  	Anote = {./images/quadrianto2010beyond.png},
  	Author = {Novi Quadrianto and Kristian Kersting and Tinne Tuytelaars and Wray L Buntine},
  	Booktitle = {Proceedings of the international conference on Multimedia Information Retrieval (MIR)},
  	Keywords = {Image Browsing, Kernelized Sorting},
  	Note = {Ideally, one would like to perform image search using an intuitive and friendly approach. Many existing image search engines, however, present users with sets of images arranged in some default order on the screen, typically the relevance to a query, only. While this certainly has its advantages, arguably, a more flexible and intuitive way would be to sort images into arbitrary structures such as grids, hierarchies, or spheres so that images that are visually or semantically alike are placed together. This paper focuses on designing such a navigation system for image browsers. This is a challenging task because arbitrary layout structure makes it difficult -- if not impossible -- to compute cross-similarities between images and structure coordinates, the main ingredient of traditional layouting approaches. For this reason, we resort to a recently developed machine learning technique: kernelized sorting. It is a general technique for matching pairs of objects from different domains without requiring cross-domain similarity measures and hence elegantly allows sorting images into arbitrary structures. Moreover, we extend it so that some images can be preselected for instance forming the tip of the hierarchy allowing to subsequently navigate through the search results in the lower levels in an intuitive way.},
  	Organization = {ACM},
  	Pages = {339--348},
  	Title = {Beyond 2D-grids: a dependence maximization view on image browsing},
  	Url = {./papers/quadrianto2010beyond.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/quadrianto2010beyond.pdf}}

  @inproceedings{xu2010fast,
  	Anote = {./images/xu2010fast.png},
  	Author = {Zhao Xu and Kristian Kersting and Thorsten Joachims},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Gaussian Processes, Statistical Relational Learning, Preference learning, Active Learning},
  	Note = {In preference learning, the algorithm observes pairwise relative judgments (preference) between items as training data for learning an ordering of all items. This is an important learning problem for applications where absolute feedback is difficult to elicit, but pairwise judgments are readily available (e.g., via implicit feedback [13]). While it was already shown that active learning can effectively reduce the number of training pairs needed, the most successful existing algorithms cannot generalize over items or queries. Considering web search as an example, they would need to learn a separate relevance score for each document-query pair from scratch. To overcome this inefficiency, we propose a link-based active preference learning method based on Gaussian Processes (GPs) that incorporates dependency information from both feature-vector representations as well as relations. Specifically, to meet the requirement on computational efficiency of active exploration, we introduce a novel incremental update method that scales as well as the non-generalizing models. The proposed algorithm is evaluated on datasets for information retrieval, showing that it learns substantially faster than algorithms that cannot model dependencies.},
  	Pages = {499--514},
  	Publisher = {Springer},
  	Title = {Fast active exploration for link-based preference learning using gaussian processes},
  	Url = {./papers/xu2010fast.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/xu2010fast.pdf}}

  @inproceedings{ahmadi2010lifted,
  	Anote = {./images/ahmadi2010lifted.png},
  	Author = {Babak Ahmadi and Kristian Kersting and Fabian Hadiji},
  	Booktitle = {Proceedings of the European Workshop on Probabilistic Graphical Models (PGM)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Symmetries, High-order Marginals, Loopy Belief Propagation},
  	Note = {Lifted belief propagation (LBP) can be extremely fast at computing approximate marginal probability distributions over single variables and neighboring ones in the underlying graphical model. It does, however, not prescribe a way to compute joint distributions over pairs, triples or k-tuples of distant random variables. In this paper, we present an algorithm, called conditioned LBP, for approximating these distributions. Essentially, we select variables one at a time for conditioning, running lifted belief propagation after each selection. This naive solution, however, recomputes the lifted network in each step from scratch, therefore often canceling the benefits of lifted inference. We show how to avoid this by efficiently computing the lifted network for each conditioning directly from the one already known for the single node marginals. Our experimental results validate that significant efficiency gains are possible and illustrate the potential for second-order parameter estimation of Markov logic networks.},
  	Pages = {9},
  	Title = {Lifted belief propagation: Pairwise marginals and beyond},
  	Url = {./papers/ahmadi2010lifted.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/ahmadi2010lifted.pdf}}

  @inproceedings{antanas2010combining,
  	Anote = {./images/antanas2010combining.png},
  	Author = {Laura Antanas and Bernd Gutmann and Ingo Thon and Kristian Kersting and Luc De Raedt},
  	Booktitle = {Proceedings of the Annual Belgian-Dutch Conference on Machine Learning (BeneLearn)},
  	Keywords = {Statistical Relational Learning, Video, Card Games},
  	Note = {The key to make computer games more compelling and interesting is to create intelligent artificial game agents. A first step is teaching them the protocols to play a game. To the best of our knowledge, most systems which train AI agents are used in virtual environments. In this work we train a computer system in a real world environment by video streams. First, we demonstrate a way to bridge the gap between low-level video data and high-level symbolic data. Second, using the high-level, yet noisy data, we show that state-of-the-art statistical relational learning systems are able to capture underlying concepts in video streams. We evaluate the selected methods on the task of detecting fraudulent behavior in card games},
  	Pages = {1--6},
  	Title = {Combining video and sequential statistical relational techniques to monitor card games},
  	Url = {./papers/antanas2010combining.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/antanas2010combining.pdf}}

  @inproceedings{natarajan2010exploiting,
  	Anote = {./images/natarajan2010exploiting.png},
  	Author = {Sriraam Natarajan and Tushar Khot and Daniel Lowd and Prasad Tadepalli and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Combining Rules, Causal Independencies, Statistical Relational Learning, Markov Logic Networs},
  	Note = {A new method is proposed for compiling causal independencies into Markov logic networks (MLNs). An MLN can be viewed as compactly representing a factorization of a joint probability into the product of a set of factors guided by logical formulas. We present a notion of causal independence that enables one to further factorize the factors into a combination of even smaller factors and consequently obtain a finer-grain factorization of the joint probability. The causal independence lets us specify the factor in terms of weighted, directed clauses and operators, such as ``or'', ``sum'' or ``max'', on the contribution of the variables involved in the factors, hence combining both undirected and directed knowledge. Our experimental evaluations shows that making use of the finer-grain factorization provided by causal independence can improve quality of parameter learning in MLNs.},
  	Pages = {434--450},
  	Publisher = {Springer},
  	Title = {Exploiting causal independence in Markov logic networks: Combining undirected and directed models},
  	Url = {./papers/natarajan2010exploiting.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/natarajan2010exploiting.pdf}}

  @inproceedings{behley2010learning,
  	Anote = {./images/behley2010learning.png},
  	Author = {Jens Behley and Kristian Kersting and Dirk Schulz and Volker Steinhage and Armin B Cremers},
  	Booktitle = {Proceedings of the IEEE International Conference on Intelligent Robots and Systems (IROS)},
  	Keywords = {Robotics, Logistic Regression, 3D, Laser Range,Point Classification, Hashing, Ultra-Fast},
  	Note = {Segmenting range data into semantic categories has become a more and more active field of research in robotics. In this paper, we advocate to view this task as a problem of fast, large-scale retrieval. Intuitively, given a dataset of millions of labeled scan points and their neighborhoods, we simply search for similar points in the datasets and use the labels of the retrieved ones to predict the labels of a novel point using some local prediction model such as majority vote or logistic regression. However, actually carrying this out requires highly efficient ways of (1) storing millions of scan points in memory and (2) quickly finding similar scan points to a target scan point. In this paper, we propose to address both issues by employing Weiss et al.'s recent spectral hashing. It represents each item in a database by a compact binary code that is constructed so that similar items will have similar binary code words. In turn, similar neighbors have codes within a small Hamming distance of the code for the query. Then, we learn a logistic regression model locally over all points with the same binary code word. Our experiments on real world 3D scans show that the resulting approach, called spectrally hashed logistic regression, can be ultra fast at prediction time and outperforms state-of-the art approaches such as logistic regression and nearest neighbor.},
  	Organization = {IEEE},
  	Pages = {5960--5965},
  	Title = {Learning to hash logistic regression for fast 3D scan point classification},
  	Url = {./papers/behley2010learning.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/behley2010learning.pdf}}

  @inproceedings{kersting2010hierarchical,
  	Anote = {./images/kersting2010hierarchical.png},
  	Author = {Kristian Kersting and Mirwaes Wahabzada and Christian Thurau and Christian Bauckhage},
  	Booktitle = {Proceedings of the Asian Conference on Machine Learning (ACML)},
  	Keywords = {Matrix Factorization, Interpretable, Convex, Tree Decomposition, Massive Scale},
  	Note = {We present an extension of convex-hull non-negative matrix factorization (CH-NMF) which was recently proposed as a large scale variant of convex non-negative matrix factorization or Archetypal Analysis. CH-NMF factorizes a non-negative data matrix V into two nonnegative matrix factors V ≈ W H such that the columns of W are convex combinations of certain data points so that they are readily interpretable to data analysts. There is, however, no free lunch: imposing convexity constraints on W typically prevents adaptation to intrinsic, low dimensional structures in the data. Alas, in cases where the data is distributed in a non-convex manner or consists of mixtures of lower dimensional convex distributions, the cluster representatives obtained from CH-NMF will be less meaningful. In this paper, we present a hierarchical CH-NMF that automatically adapts to internal structures of a dataset, hence it yields meaningful and interpretable clusters for non-convex datasets. This is also confirmed by our extensive evaluation on DBLP publication records of 760,000 authors, 4,000,000 images harvested from the web, and 150,000,000 votes on World of Warcraft guilds.},
  	Pages = {253--268},
  	Title = {Hierarchical Convex NMF for Clustering Massive Data},
  	Url = {http://proceedings.mlr.press/v13/kersting10a/kersting10a.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {http://proceedings.mlr.press/v13/kersting10a/kersting10a.pdf}}

  @inproceedings{jawad2010kernelized,
  	Anote = {./images/jawad2010kernelized.png},
  	Author = {Ahmed Jawad and Kristian Kersting},
  	Booktitle = {Proceedings of the 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems (GIS)},
  	Keywords = {Map Matching, Kernels},
  	Note = {Map matching is a fundamental operation in many applications such as traffic analysis and location-aware services, the killer apps for ubiquitous computing. In past, several map matching approaches have been proposed. Roughly, they can be categorized into four groups: geometric, topological, probabilistic, and other advanced techniques. Surprisingly, kernel methods have not received attention yet although they are very popular in the machine learning community due to their solid mathematical foundation, tendency toward easy geometric interpretation, and strong empirical performance in a wide variety of domains. In this paper, we show how to employ kernels for map matching. Specifically, ignoring map constraints, we first maximize the consistency between the similarity measures captured by the kernel matrices of the trajectory and relevant part of the street map. The resulting relaxed assignment is then ''rounded'' into a hard assignment fulfilling the map constraints. On synthetic and real-world trajectories, we show that kernels methods can be used for map matching and perform well compared to probabilistic methods such as HMMs.},
  	Organization = {ACM},
  	Pages = {454--457},
  	Title = {Kernelized map matching},
  	Url = {./papers/jawad2010kernelized.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/jawad2010kernelized.pdf}}

  @inproceedings{kersting2010informed,
  	Anote = {./images/kersting2010informed.png},
  	Author = {Kristian Kersting and Youssef El Massaoudi and Fabian Hadiji and Babak Ahmadi},
  	Booktitle = {Proceedings of the 24th AAAI Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relational AI, Lifted Inferences, Symmetries, Loopy Belief Propagation, On-the-fly},
  	Note = {Lifted inference, handling whole sets of indistinguishable objects together, is critical to the effective application of probabilistic relational models to realistic real world tasks. Recently, lifted belief propagation (LBP) has been proposed as an efficient approximate solution of this inference problem. It runs a modified BP on a lifted network where nodes have been grouped together if they have --- roughly speaking --- identical computation trees, the tree-structured unrolling of the underlying graph rooted at the nodes. In many situations, this purely syntactic criterion is too pessimistic: message errors decay along paths. Intuitively, for a long chain graph with weak edge potentials, distant nodes will send and receive identical messages yet their computation trees are quite different. To overcome this, we propose iLBP, a novel, easy-to-implement, informed LBP approach that interleaves lifting and modified BP iterations. In turn, we can efficiently monitor the true BP messages sent and received in each iteration and group nodes accordingly. As our experiments show, iLBP can yield significantly faster more lifted network while not degrading performance. Above all, we show that iLBP is faster than BP when solving the problem of distributing data to a large network, an important real-world application where BP is faster than uninformed LBP.},
  	Title = {Informed Lifting for Message-Passing},
  	Url = {https://www.researchgate.net/profile/Kristian_Kersting/publication/221604336_Informed_Lifting_for_Message-Passing/links/0912f512b40316d189000000/Informed-Lifting-for-Message-Passing.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {https://www.researchgate.net/profile/Kristian_Kersting/publication/221604336_Informed_Lifting_for_Message-Passing/links/0912f512b40316d189000000/Informed-Lifting-for-Message-Passing.pdf}}

  @inproceedings{natarajan2010multi,
  	Anote = {./images/natarajan2010multi.png},
  	Author = {Sriraam Natarajan and Gautam Kunapuli and Kshitij Judah and Prasad Tadepalli and Kristian Kersting and Jude Shavlik},
  	Booktitle = {Proceedings of the 9th International Conference on Machine Learning and Applications (ICMLA)},
  	Keywords = {Inverse Reinforcement Learning, Multi-Agents},
  	Note = {Learning the reward function of an agent by observing its behavior is termed inverse reinforcement learning and has applications in learning from demonstration or apprenticeship learning. We introduce the problem of multiagent inverse reinforcement learning, where reward functions of multiple agents are learned by observing their uncoordinated behavior. A centralized controller then learns to coordinate their behavior by optimizing a weighted sum of reward functions of all the agents. We evaluate our approach on a traffic-routing domain, in which a controller coordinates actions of multiple traffic signals to regulate traffic density. We show that the learner is not only able to match but even significantly outperform the expert.},
  	Organization = {IEEE},
  	Pages = {395--400},
  	Title = {Multi-agent inverse reinforcement learning},
  	Url = {./papers/natarajan2010multi.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/natarajan2010multi.pdf}}

  @inproceedings{ganzert2010identifying,
  	Anote = {./images/ganzert2010identifying.png},
  	Author = {Steven Ganzert and Knut Möller and Stefan Kramer and Kristian Kersting and Josef Guttmann},
  	Booktitle = {Proceedings of the World Congress on Medical Physics and Biomedical Engineering (IFMBE)},
  	Keywords = {Process Model Induction, Differential Equation, Medicine, Lung},
  	Note = {Mechanical ventilation is the live-saving therapy in intensive care medicine by all means. Nevertheless, it can induce severe mechanical stress to the lung, which generally impairs the outcome of the therapy. To reduce the risk of a ventilator induced lung injury (VILI), lung protective ventilation is essential, especially for patients with a previous medical history like the adult respiratory distress syndrome (ARDS). The prerequisite for lung protective ventilation approaches is the knowledge about the physical behavior of the human lung under the condition of mechanical ventilation. This knowledge is commonly described by mathematical models. Diverse models have been introduced to represent particular aspects of mechanical characteristics of the lung. A commonly accepted general model is the equation of motion, which relates the airway pressure to the airflow and the volume applied by the ventilator and describes the influence of the distensibility and resistance of the respiratory system. Equation Discovery systems extract mathematical models from observed time series data. To reduce the vast search space associated with this task, the LAGRAMGE-system introduced the application of declarative bias in Equation Discovery, which furthermore allows the presentation of domain specific knowledge. We introduce a modification of this system and apply it to data obtained during mechanical ventilation of ARDS-patients. We experimentally validate the effectiveness of our approach and show that the equation of motion model can automatically be rediscovered from real-world data.},
  	Organization = {Springer},
  	Pages = {1524--1527},
  	Title = {Identifying mathematical models of the mechanically ventilated lung using equation discovery},
  	Url = {https://link.springer.com/chapter/10.1007/978-3-642-03882-2_404},
  	Year = {2010},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-642-03882-2_404}}

  @inproceedings{wahabzada2010topic,
  	Anote = {./images/wahabzada2010topic.png},
  	Author = {Mirwaes Wahabzada and Zhao Xu and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Principles of Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Probabilistic Topics Model, Latent Dirichlet Allocation, Relational Domain, Statistical Relational learning},
  	Note = {Latent Dirichlet allocation is a fully generative statistical language model that has been proven to be successful in capturing both the content and the topics of a corpus of documents. Recently, it was even shown that relations among documents such as hyper-links or citations allow one to share information between documents and in turn to improve topic generation. Although fully generative, in many situations we are actually not interested in predicting relations among documents. In this paper, we therefore present a Dirichlet-multinomial nonparametric regression topic model that includes a Gaussian process prior on joint document and topic distributions that is a function of document relations. On networks of scientific abstracts and of Wikipedia documents we show that this approach meets or exceeds the performance of several baseline topic models.},
  	Organization = {Springer},
  	Pages = {402--417},
  	Title = {Topic models conditioned on relations},
  	Url = {./papers/wahabzada2010topic.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/wahabzada2010topic.pdf}}

  @inproceedings{thurau2010yes,
  	Anote = {./images/thurau2010yes.png},
  	Author = {Christian Thurau and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the 19th ACM international conference on Information and knowledge management (CIKM)},
  	Keywords = {Interpretable Matric Factorization, Simplex Volume Maximization, Web-Scale},
  	Note = {Matrix factorization methods are among the most common techniques for detecting latent components in data. Popular examples include the Singular Value Decomposition or Non-negative Matrix Factorization. Unfortunately, most methods suffer from high computational complexity and therefore do not scale to massive data. In this paper, we present a linear time algorithm for the factorization of gigantic matrices that iteratively yields latent components. We consider a constrained matrix factorization s.t.~the latent components form a simplex that encloses most of the remaining data. The algorithm maximizes the volume of that simplex and thereby reduces the displacement of data from the space spanned by the latent components. Hence, it also lowers the Frobenius norm, a common criterion for matrix factorization quality. Our algorithm is efficient, well-grounded in distance geometry, and easily applicable to matrices with billions of entries. In addition, the resulting factors allow for an intuitive interpretation of data: every data point can now be expressed as a convex combination of the most extreme and thereby often most descriptive instances in a collection of data. Extensive experimental validations on web-scale data, including 80 million images and 1.5 million twitter tweets, demonstrate superior performance compared to related factorization or clustering techniques.},
  	Organization = {ACM},
  	Pages = {1785--1788},
  	Title = {Yes we can: simplex volume maximization for descriptive web-scale matrix factorization},
  	Url = {./papers/thurau2010yes.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/thurau2010yes.pdf}}

  @incollection{xu2010social,
  	Anote = {./images/xu2010social.png},
  	Author = {Zhao Xu and Volker Tresp and Achim Rettinger and Kristian Kersting},
  	Booktitle = {Advances in social network mining and analysis},
  	Keywords = {Statistical Relational Learning, Non-Parametric, Social Networks, Infinite Hidden Relational Models},
  	Note = {Statistical relational learning (SRL) provides effective techniques to analyze social network data with rich collections of objects and complex networks. Infinite hidden relational models (IHRMs) introduce nonparametric mixture models into relational learning and have been successful in many relational applications. In this paper we explore the modeling and analysis of complex social networks with IHRMs for community detection, link prediction and product recommendation. In an IHRM-based social network model, each edge is associated with a random variable and the probabilistic dependencies between these random variables are specified by the model, based on the relational structure. The hidden variables, one for each object, are able to transport information such that non-local probabilistic dependencies can be obtained. The model can be used to predict entity attributes, to predict relationships between entities and it performs an interpretable cluster analysis. We demonstrate the performance of IHRMs with three social network applications. We perform community analysis on the Sampson's monastery data and perform link analysis on the Bernard & Killworth data. Finally we apply IHRMs to the MovieLens data for prediction of user preference on movies and for an analysis of user clusters and movie clusters.},
  	Pages = {77--96},
  	Publisher = {Springer Berlin Heidelberg},
  	Title = {Social network mining with nonparametric relational models},
  	Url = {./papers/xu2010social.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/xu2010social.pdf}}

  @incollection{hadiji2010lifted,
  	Author = {Fabian Hadiji and Kristian Kersting and Babak Ahmadi},
  	Booktitle = {Working Notes of the International Workshop on Statistical Relational Artificial Intelligence (StarAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, SAT, Symmetries},
  	Title = {Lifted Message Passing for Satisfiability},
  	Year = {2010}}

  @incollection{jawad2010kernelizedws,
  	Anote = {./images/kdml2010ahmed.png},
  	Author = {Ahmed Jawad and Kristian Kersting},
  	Booktitle = {Working Notes of the Lernen, Wissen, Adaptivit{\"a}t (LWA) Workshop},
  	Keywords = {Kernels, Map Matching},
  	Note = {Map matching is a fundamental operation in many applications such as traffic analysis and location-aware services, the killer apps for ubiquitous computing. In this paper, we show how to employ kernels for map matching. Specifically, ignoring map constraints, we first maximize the consistency between the similarity measures captured by the kernel matrices of the trajectory and relevant part of the street map.},
  	Pages = {89--96},
  	Title = {Kernelized Map Matching for noisy trajectories},
  	Url = {./papers/kdml10jawad.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/kdml10jawad.pdf}}

  @incollection{ahmadi2010lifted,
  	Author = {Babak Ahmadi and Kristian Kersting and Fabian Hadiji},
  	Booktitle = {Working Notes of the Lernen, Wissen, Adaptivit{\"a}t (LWA) Workshop},
  	Keywords = {Statistical Relational AI, Lifted Inference, High-order Marginal, Belief Propagation},
  	Pages = {13--18},
  	Title = {Lifted Conditioning for Pairwise Marginals and Beyond},
  	Year = {2010}}

  @incollection{kersting2010convex,
  	Author = {Kristian Kersting and Mirwaes Wahabzada and Christian Thurau and Christian Bauckhage},
  	Booktitle = {Working Notes of the Lernen, Wissen, Adaptivit{\"a}t (LWA) Workshop},
  	Keywords = {Convex Matrix Factorization, Interpretable, Massive Data},
  	Pages = {97--104},
  	Title = {Convex NMF on Non-Convex Massiv Data},
  	Year = {2010}}

  @incollection{quadrianto2010gaussian,
  	Author = {Novi Quadrianto and Kristian Kersting and Zhao Xu},
  	Booktitle = {Encyclopedia of Machine Learning},
  	Keywords = {Intoduction, Gaussian Processes, Short},
  	Pages = {428--439},
  	Publisher = {Springer},
  	Title = {Gaussian process},
  	Year = {2010}}

  @inbook{dereadt2010probabilistic,
  	Anote = {./images/dereadt2010probabilistic.png},
  	Author = {Luc {De Raedt} and Angelika Kimmig and Bernd Gutmann and Kristian Kersting and V{\'\i}tor Santos Costa and Hannu Toivonen},
  	Booktitle = {Inductive Databases and Constraint-Based Data Mining},
  	Keywords = {Statistical Relational Learning, Problog, Inference},
  	Note = {We study how probabilistic reasoning and inductive querying can be combined within ProbLog, a recent probabilistic extension of Prolog. ProbLog can be regarded as a database system that supports both probabilistic and inductive reasoning through a variety of querying mechanisms. After a short introduction to ProbLog, we provide a survey of the different types of inductive queries that ProbLog supports, and show how it can be applied to the mining of large biological networks.},
  	Pages = {229--262},
  	Publisher = {Springer},
  	Title = {Probabilistic inductive querying using ProbLog},
  	Url = {./papers/dereadt2010probabilistic.pdf},
  	Year = {2010},
  	Bdsk-Url-1 = {./papers/dereadt2010probabilistic.pdf}}

  @inproceedings{kersting2009counting,
  	Anote = {./images/kersting2009counting.png},
  	Author = {Kristian Kersting and Babak Ahmadi and Sriraam Natarajan},
  	Booktitle = {Proceedings of the 25th Conference on Uncertainty in Artificial Intelligence (UAI)},
  	Keywords = {Statistical Relational Learning, Lifted Inference, Symmetries, Color Passing, Loopy Belief Propagation},
  	Note = {A major benefit of graphical models is that most knowledge is captured in the model structure. Many models, however, produce inference problems with a lot of symmetries not reflected in the graphical structure and hence not exploitable by efficient inference techniques such as belief propagation (BP). In this paper, we present a new and simple BP algorithm, called counting BP, that exploits such additional symmetries. Starting from a given factor graph, counting BP first constructs a compressed factor graph of clusternodes and clusterfactors, corresponding to sets of nodes and factors that are indistinguishable given the evidence. Then it runs a modified BP algorithm on the compressed graph that is equivalent to running BP on the original factor graph. Our experiments show that counting BP is applicable to a variety of important AI tasks such as (dynamic) relational models and boolean model counting, and that significant efficiency gains are obtainable, often by orders of magnitude.},
  	Organization = {AUAI Press},
  	Pages = {277--284},
  	Title = {Counting belief propagation},
  	Url = {./papers/kersting2009counting.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/kersting2009counting.pdf}}

  @inproceedings{xu2009multi,
  	Anote = {./images/xu2009multi.png},
  	Author = {Zhao Xu and Kristian Kersting and Volker Tresp},
  	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Statistical Relational Learning, Gaussian Processes},
  	Note = {Due to their flexible nonparametric nature, Gaussian process models are very effective at solving hard machine learning problems. While existing Gaussian process models focus on modeling one single relation, we present a generalized GP model, named multi-relational Gaussian process model, that is able to deal with an arbitrary number of relations in a domain of interest. The proposed model is analyzed in the context of bipartite, directed, and undirected univariate relations. Experimental results on real-world datasets show that exploiting the correlations among different entity types and relations can indeed improve prediction performance.},
  	Pages = {1309--1314},
  	Title = {Multi-Relational Learning with Gaussian Processes.},
  	Url = {./paper/xu2009multi.pdf},
  	Volume = {9},
  	Year = {2009},
  	Bdsk-Url-1 = {./paper/xu2009multi.pdf}}

  @inproceedings{joshi2009generalized,
  	Anote = {./images/joshi2009generalized.png},
  	Author = {Saket Joshi and Kristian Kersting and Roni Khardon},
  	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
  	Keywords = {Statistical Relational AI, Relational Markov Decision Processes, First Order Decision Diagrams, Optimal Actions},
  	Note = {First order decision diagrams (FODD) were recently introduced as a compact knowledge representation expressing functions over relational structures. FODDs represent numerical functions that, when constrained to the Boolean range, use only existential quantification. Previous work developed a set of operations over FODDs, showed how they can be used to solve relational Markov decision processes (RMDP) using dynamic programming algorithms, and demonstrated their success in solving stochastic planning problems from the International Planning Competition in the system FODD-Planner. A crucial ingredient of this scheme is a set of operations to remove redundancy in decision diagrams, thus keeping them compact. This paper makes three contributions. First, we introduce Generalized FODDs (GFODD) and combination algorithms for them, generalizing FODDs to arbitrary quantification. Second, we show how GFODDs can be used in principle to solve RMDPs with arbitrary quantification, and develop a particularly promising case where an arbitrary number of existential quantifiers is followed by an arbitrary number of universal quantifiers. Third, we develop a new approach to reduce FODDs and GFODDs using model checking. This yields a reduction that is complete for FODDs and provides a sound reduction procedure for GFODDs.},
  	Pages = {1916--1921},
  	Title = {Generalized First Order Decision Diagrams for First Order Markov Decision Processes},
  	Url = {./papers/joshi2009generalized.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/joshi2009generalized.pdf}}

  @inproceedings{thurau2009convex,
  	Author = {Christian Thurau and Kristian Kersting and Christian Bauckhage},
  	Booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Interpretable, Matrix Factorization, Convex},
  	Note = {Non-negative matrix factorization (NMF) has become a standard tool in data mining, information retrieval, and signal processing. It is used to factorize a non-negative data matrix into two non-negative matrix factors that contain basis elements and linear coefficients, respectively. Often, the columns of the first resulting factor are interpreted as ``cluster centroids'' of the input data, and the columns of the second factor are understood to contain cluster membership indicators. When analyzing data such as collections of gene expressions, documents, or images, it is often beneficial to ensure that the resulting cluster centroids are meaningful, for instance, by restricting them to be convex combinations of data points. However, known approaches to convex-NMF suffer from high computational costs and therefore hardly apply to large-scale data analysis problems. This paper presents a new framework for convex-NMF that allows for an efficient factorization of data matrices of millions of data points. Triggered by the simple observation that each data point can be expressed as a convex combination of vertices of the data convex hull, we require the basic factors to be vertices of the data convex hull. The benefits of convex-hull NMF are twofold. First, for a growing number of data points the expected size of the convex hull, i.e. the number of its vertices, grows much slower than the dataset. Second, distance preserving low-dimensional embeddings allow us to efficiently sample the convex hull and hence to quickly determine candidate vertices. Our extensive experimental evaluation on large datasets shows that convex-hull NMF compares favorably to convex-NMF in terms of both speed and reconstruction quality. We demonstrate that our method can easily be applied to large-scale, real-world datasets, in our case consisting of 750,000 DBLP entries, 4,000,000 digital images, and 150,000,000 votes on World of Warcraft {\textregistered}guilds, respectively.},
  	Organization = {IEEE},
  	Pages = {523--532},
  	Title = {Convex non-negative matrix factorization in the wild},
  	Year = {2009}}

  @inproceedings{kersting2009learning,
  	Anote = {./images/kersting2009learning.png},
  	Author = {Kristian Kersting and Zhao Xu},
  	Booktitle = {European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Gaussian Processes, Preference Learning, Statistical Relational learning, Hidden Common Cause Relations},
  	Note = {Gaussian processes have successfully been used to learn preferences among entities as they provide nonparametric Bayesian approaches for model selection and probabilistic inference. For many entities encountered in real-world applications, however, there are complex relations between them. In this paper, we present a preference model which incorporates information on relations among entities. Specifically, we propose a probabilistic relational kernel model for preference learning based on Silva et al.'s mixed graph Gaussian processes: a new prior distribution, enhanced with relational graph kernels, is proposed to capture the correlations between preferences. Empirical analysis on the LETOR datasets demonstrates that relational information can improve the performance of preference learning.},
  	Pages = {676--691},
  	Publisher = {Springer},
  	Title = {Learning preferences with hidden common cause relations},
  	Url = {./papers/kersting2009learning.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/kersting2009learning.pdf}}

  @inproceedings{quadrianto2009kernel,
  	Anote = {./images/quadrianto2009kernel.png},
  	Author = {Novi Quadrianto and Kristian Kersting and Mark Reid and Tiberio Caetano and Wray Buntine},
  	Booktitle = {Proceedings of the 9th International Conference on Data Mining (ICDM)},
  	Keywords = {Gaussian Processes, Quantile Esimtation, Reduction},
  	Note = {Quantile regression refers to the process of estimating the quantiles of a conditional distribution and has many important applications within econometrics and data mining, among other domains. In this paper, we show how to estimate these quantile functions within a Bayes risk minimization framework using a Gaussian process prior. The resulting non-parametric probabilistic model is easy to implement and allows non-crossing quantile functions to be enforced. Moreover, it can directly be used in combination with tools and extensions of standard Gaussian Processes such as principled hyperparameter estimation, sparsification, and quantile regression with input-dependent noise rates. No existing approach enjoys all of these desirable properties. Experiments on benchmark datasets show that our method is competitive with state-of-the-art approaches.},
  	Organization = {IEEE},
  	Pages = {938--943},
  	Title = {Kernel conditional quantile estimation via reduction revisited},
  	Url = {./papers/quadrianto2009kernel.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/quadrianto2009kernel.pdf}}

  @inproceedings{neumann2009stacked,
  	Anote = {./images/neumann2009stacked.png},
  	Author = {Marion Neumann and Kristian Kersting and Zhao Xu and Daniel Schulz},
  	Booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining( ICDM)},
  	Keywords = {Gaussian Processes, Stacked Learning, Relational Domain},
  	Note = {Triggered by a market relevant application that involves making joint predictions of pedestrian and public transit flows in urban areas, we address the question of how to utilize hidden common cause relations among variables of interest in order to improve performance in the two related regression tasks. Specifically, we propose stacked Gaussian process learning, a meta-learning scheme in which a base Gaus- sian process is enhanced by adding the posterior covariance functions of other related tasks to its covariance function in a stage-wise optimization. The idea is that the stacked posterior covariances encode the hidden common causes among variables of interest that are shared across the related regression tasks. Stacked Gaussian process learning is efficient, capable of capturing shared common causes, and can be implemented with any kind of standard Gaussian process regression model such as sparse approximations and relational variants. Our experimental results on real-world data from the market relevant application show that stacked Gaussian processes learning can significantly improve prediction performance of a standard Gaussian process.},
  	Organization = {IEEE},
  	Pages = {387--396},
  	Title = {Stacked Gaussian process learning},
  	Url = {./papers/neumann2009stacked.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/neumann2009stacked.pdf}}

  @inproceedings{schulz2009ilp,
  	Anote = {./images/schulz2009ilp.png},
  	Author = {Hannes Schulz and Kristian Kersting and Andreas Karwath},
  	Booktitle = {Proceedings of the 19th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Euclidean Embedding, Relational Learning, Statistical Relational Learning, Inductive Logic Programming},
  	Note = {Relational data is complex. This complexity makes one of the basic steps of ILP difficult: understanding the data and results. If the user cannot easily understand it, he draws incomplete conclusions. The situation is very much as in the parable of the blind men and the elephant that appears in many cultures. In this tale the blind work independently and with quite different pieces of information, thereby drawing very different conclusions about the nature of the beast. In contrast, visual representations make it easy to shift from one perspective to another while exploring and analyzing data. This paper describes a method for embedding interpretations and queries into a single, common Euclidean space based on their co-proven statistics. We demonstrate our method on real-world datasets showing that ILP results can indeed be captured at a glance.},
  	Organization = {Springer},
  	Pages = {209--216},
  	Title = {ILP, the blind, and the elephant: Euclidean embedding of co-proven queries},
  	Url = {./papers/schulz2009ilp.pdf},
  	Year = {2009},
  	Bdsk-Url-1 = {./papers/schulz2009ilp.pdf}}

  @article{plagemann2009bayesian,
  	Anote = {./images/jfr09-cp.png},
  	Author = {Christian Plagemann and Sebastian Mischke and Sam Prentice and Kristian Kersting and Nicholas Roy and Wolfram Burgard},
  	Journal = {Journal of Field Robotics},
  	Keywords = {Robotics, Gaussian Processes, Terrain Mapping},
  	Note = {We deal with the problem of learning probabilistic models of terrain surfaces from sparse and noisy elevation measurements. The key idea is to formalize this as a regression problem and to derive a solution based on nonstationary Gaussian processes. We describe how to achieve a sparse approximation of the model, which makes the model applicable to real-world data sets. The main benefits of our model are that (1) it does not require a discretization of space, (2) it also provides the uncertainty for its predictions, and (3) it adapts its covariance function to the observed data, allowing more accurate inference of terrain elevation at points that have not been observed directly. As a second contribution, we describe how a legged robot equipped with a laser range finder can utilize the developed terrain model to plan and execute a path over rough terrain. We show how a motion planner can use the learned terrain model to plan a path to a goal location, using a terrain-specific cost model to accept or reject candidate footholds. To the best of our knowledge, this was the first legged robotics system to autonomously sense, plan, and traverse a terrain surface of the given complexity.},
  	Number = {10},
  	Pages = {789--811},
  	Publisher = {Wiley Online Library},
  	Title = {A Bayesian regression approach to terrain mapping and an application to legged robot locomotion},
  	Url = {https://people.csail.mit.edu/prentice/papers/jfr09-cp.pdf},
  	Volume = {26},
  	Year = {2009},
  	Bdsk-Url-1 = {https://people.csail.mit.edu/prentice/papers/jfr09-cp.pdf}}

  @misc{deraedt200807161,
  	Author = {Luc De Raedt and Thomas Dietterich and Lise Getoor and Kristian Kersting and Stephen H Muggleton},
  	Howpublished = {Dagstuhl Seminar Proceedings},
  	Organization = {Schloss Dagstuhl-Leibniz-Zentrum für Informatik},
  	Title = {07161 Abstracts Collection--Probabilistic, Logical and Relational Learning-A Further Synthesis},
  	Year = {2008}}

  @inproceedings{gutmann2008parameter,
  	Anote = {./images/gutmann2008parameter.png},
  	Author = {Bernd Gutmann and Angelika Kimmig and Kristian Kersting and Luc De Raedt},
  	Booktitle = {Proceedings of the European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Statistical Relational Learning, Least Squares, Parameter Estimation},
  	Note = {We introduce the problem of learning the parameters of the probabilistic database ProbLog. Given the observed success probabilities of a set of queries, we compute the probabilities attached to facts that have a low approximation error on the training examples as well as on unseen examples. Assuming Gaussian error terms on the observed success probabilities, this naturally leads to a least squares optimization problem. Our approach, called LeProbLog, is able to learn both from queries and from proofs and even from both simultaneously. This makes it flexible and allows faster training in domains where the proofs are available. Experiments on real world data show the usefulness and effectiveness of this least squares calibration of probabilistic databases.},
  	Organization = {Springer Berlin Heidelberg},
  	Pages = {473--488},
  	Title = {Parameter learning in probabilistic databases: A least squares approach},
  	Url = {./papers/gutmann2008parameter.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/gutmann2008parameter.pdf}}

  @inproceedings{plagemann2008learning,
  	Anote = {./images/plagemann08iros.png},
  	Author = {Christian Plagemann and Sebastian Mischke and Sam Prentice and Kristian Kersting and Nicholas Roy and Wolfram Burgard},
  	Booktitle = {Proceedings of the IEEE International Conference on Intelligent Robots and Systems (IROS)},
  	Keywords = {Robotics, Gaussian Processes, Terrain Model, Predictive Uncertainty},
  	Note = {Legged robots require accurate models of their environment in order to plan and execute paths. We present a probabilistic technique based on Gaussian processes that allows terrain models to be learned and updated efficiently using sparse approximation techniques. The major benefit of our terrain model is its ability to predict elevations at unseen locations more reliably than alternative approaches, while it also yields estimates of the uncertainty in the prediction. In particular, our nonstationary Gaussian process model adapts its covariance to the situation at hand, allowing more accurate inference of terrain height at points that have not been observed directly. We show how a conventional motion planner can use the learned terrain model to plan a path to a goal location, using a terrain-specific cost model to accept or reject candidate footholds. In experiments with a real quadruped robot equipped with a laser range finder, we demonstrate the usefulness of our approach and discuss its benefits compared to simpler terrain models such as elevations grids.},
  	Organization = {IEEE},
  	Pages = {3545--3552},
  	Title = {Learning predictive terrain models for legged robot locomotion},
  	Url = {./paper/plagemann08iros.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./paper/plagemann08iros.pdf}}

  @inproceedings{kersting2008non,
  	Anote = {./images/kersting2008non.png},
  	Author = {Kristian Kersting and Kurt Driessens},
  	Booktitle = {Proceedings of the 25th international conference on Machine learning (ICML)},
  	Keywords = {Statistical Relational Learning, Relational Reinforcement Learning, Policy Gradient, Functional Gradient Boosting},
  	Note = {Policy gradient approaches are a powerful instrument for learning how to interact with the environment. Existing approaches have focused on propositional and continuous domains only. Without extensive feature engineering, it is difficult -- if not impossible -- to apply them within structured domains, in which e.g. there is a varying number of objects and relations among them. In this paper, we describe a non-parametric policy gradient approach -- called NPPG -- that overcomes this limitation. The key idea is to apply Friedmann's gradient boosting: policies are represented as a weighted sum of regression models grown in an stage-wise optimization. Employing off-the-shelf regression learners, NPPG can deal with propositional, continuous, and relational domains in a uni- fied way. Our experimental results show that it can even improve on established results.},
  	Organization = {ACM},
  	Pages = {456--463},
  	Title = {Non-parametric policy gradients: A unified treatment of propositional and relational domains},
  	Url = {./papers/kersting2008non.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/kersting2008non.pdf}}

  @inproceedings{karwath2008boosting,
  	Anote = {./images/karwath2008boosting.png},
  	Author = {Andreas Karwath and Kristian Kersting and Niels Landwehr},
  	Booktitle = {Proceedings of the 8th IEEE International Conference on Data Mining (ICDM)},
  	Keywords = {Statistical Relational Learning, Functional Gradient Boosting, Sequence Alignment},
  	Note = {The task of aligning sequences arises in many applications. Classical dynamic programming approaches require the explicit state enumeration in the reward model. This is often impractical: the number of states grows very quickly with the number of domain objects and relations among these objects. Relational sequence alignment aims at exploiting symbolic structure to avoid the full enumeration. This comes at the expense of a more complex reward model selection problem: virtually infinitely many abstraction levels have to be explored. In this paper, we apply gradientbased boosting to leverage this problem. Specifically, we show how to reduce the learning problem to a series of relational regressions problems. The main benefit of this is that interactions between states variables are introduced only as needed, so that the potentially infinite search space is not explicitly considered. As our experimental results show, this boosting approach can significantly improve upon established results in challenging applications.},
  	Organization = {IEEE},
  	Pages = {857--862},
  	Title = {Boosting relational sequence alignments},
  	Url = {./papers/karwath2008boosting.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/karwath2008boosting.pdf}}

  @inproceedings{plagemann2008nonstationary,
  	Anote = {./images/plagemann2008nonstationary.png},
  	Author = {Christian Plagemann and Kristian Kersting and Wolfram Burgard},
  	Booktitle = {European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD)},
  	Keywords = {Gaussian Processes, Bayesian Regression, Non-Stationary},
  	Note = {Gaussian processes using nonstationary covariance functions are a powerful tool for Bayesian regression with input-dependent smoothness. A common approach is to model the local smoothness by a latent process that is integrated over using Markov chain Monte Carlo approaches. In this paper, we demonstrate that an approximation that uses the estimated mean of the local smoothness yields good results and allows one to employ efficient gradient-based optimization techniques for jointly learning the parameters of the latent and the observed processes. Extensive experiments on both synthetic and real-world data, including challenging problems in robotics, show the relevance and feasibility of our approach.},
  	Pages = {204--219},
  	Publisher = {Springer},
  	Title = {Nonstationary Gaussian process regression using point estimates of local smoothness},
  	Url = {./papers/plagemann2008nonstationary.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/plagemann2008nonstationary.pdf}}

  @inproceedings{natarajan2008logical,
  	Anote = {./images/natarajan2008logical.png},
  	Author = {Sriraam Natarajan and Hung Bui and Prasad Tadepalli and Kristian Kersting and Weng-Keen Wong},
  	Booktitle = {Proceedings of the 18th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Logical Hidden Markov Models, User Modeling},
  	Note = {Hidden Markov Models (HMM) have been successfully used in applications such as speech recognition, activity recognition, bioinformatics etc. There have been previous attempts such as Hierarchical HMMs and Abstract HMMs to elegantly extend HMMs at multiple levels of temporal abstraction (for example to represent the user's activities). Similarly, there has been previous work such as Logical HMMs on extending HMMs to domains with relational structure. In this work we develop a representation that naturally combines the power of both relational and hierarchical models in the form of Logical Hierarchical Hidden Markov Models (LoHiHMMs). LoHiHMMs inherit the compactness of representation from Logical HMMs and the tractability of inference from Hierarchical HMMs. We outline two inference algorithms: one based on grounding the LoHiHMM to a propositional HMM and the other based on particle filtering adapted for this setting. We present the results of our experiments with the model in two simulated domains.},
  	Organization = {Springer},
  	Pages = {192--209},
  	Title = {Logical hierarchical hidden markov models for modeling user activities},
  	Url = {./papers/natarajan2008logical.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/natarajan2008logical.pdf}}

  @inproceedings{katz2008modeling,
  	Anote = {./images/katz2008modeling.png},
  	Author = {Yarden Katz and Noah D Goodman and Kristian Kersting and Charles Kemp and Joshua B Tenenbaum},
  	Booktitle = {Proceedings of the Cognitive Science Society (CogSci)},
  	Keywords = {Cognitive Science, Statistical Relational Learning, Logical Dimensionality Reduction},
  	Note = {Semantic knowledge is often expressed in the form of intuitive theories, which organize, predict and explain our observations of the world. How are these powerful knowledge structures represented and acquired? We present a framework, logical dimensionality reduction, that treats theories as compressive probabilistic models, attempting to express observed data as a sample from the logical consequences of the theory's underlying laws and a small number of core facts. By performing Bayesian learning and inference on these models we combine important features of more familiar connectionist and symbolic approaches to semantic cognition: an ability to handle graded, uncertain inferences, together with systematicity and compositionality that support appropriate inferences from sparse observations in novel contexts.},
  	Number = {30},
  	Title = {Modeling semantic cognition as logical dimensionality reduction},
  	Url = {./papers/katz2008modeling.pdf},
  	Volume = {30},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/katz2008modeling.pdf}}

  @inproceedings{milchZKHK08,
  	Anote = {./images/milchZKHK08.png},
  	Author = {Brian Milch and Luke S. Zettlemoyer and Kristian Kersting and Michael Haimes and Leslie Pack Kaelbling},
  	Booktitle = {Proceedings of the 23th {AAAI} Conference on Artificial Intelligence, (AAAI)},
  	Keywords = {Statistical Relational AI, Lifted Inference, Exact, C-FOVE},
  	Note = {Lifted inference algorithms exploit repeated structure in probabilistic models to answer queries efficiently. Previous work such as de Salvo Braz et al.'s first-order variable elimination (FOVE) has focused on the sharing of potentials across interchangeable random variables. In this paper, we also exploit interchangeability within individual potentials by introducing counting formulas, which indicate how many of the random variables in a set have each possible value. We present a new lifted inference algorithm, C-FOVE, that not only handles counting formulas in its input, but also creates counting formulas for use in intermediate potentials. C-FOVE can be described succinctly in terms of six operators, along with heuristics for when to apply them. Because counting formulas capture dependencies among large numbers of variables compactly, C-FOVE achieves asymptotic speed improvements compared to FOVE.},
  	Pages = {1062--1068},
  	Title = {Lifted Probabilistic Inference with Counting Formulas},
  	Url = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-168.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-168.pdf}}

  @incollection{missura2008towards,
  	Author = {Olana Missura and Kristian Kersting and Thomas G{\"a}rtner},
  	Booktitle = {Wokring Notes of the Workshop on Artificial Intelligence in Games},
  	Keywords = {Automatic difficulty scaling, Markov Decision Processes},
  	Pages = {55},
  	Title = {Towards Engaging MDPs},
  	Year = {2008}}

  @incollection{xu2008gaussian,
  	Anote = {./images/xu2008gaussian.png},
  	Author = {Zhao Xu and Kristian Kersting and Volker Tresp},
  	Booktitle = {Working Notes of the NIPS Workshop on Analyzing Graphs},
  	Keywords = {Statistical Relational Learning, Gaussian Processes},
  	Note = {Many real-world domains can naturally be represented as complex graphs, i.e., in terms of entities (nodes) and relations (edges) among them. In domains with multiple relations, represented as colored graphs, we may improve the quality of a model by exploiting the correlations among relations of different types. To this end, we develop a multi-relational Gaussian process (MRGP) model. The MRGP model introduces multiple GPs for each type of entities. Each random variable drawn from a GP represents profile/preference of an entity in some aspect, which is the function value of entity features at the aspect. These GPs are then coupled together via relations between entities. The MRGP model can be used for relation prediction and (semi-) supervised learning. We give an analysis of the MRGP model for bipartite, directed and undirected univariate relations.},
  	Title = {Gaussian Process Models for Colored Graphs},
  	Url = {./papers/xu2008gaussian.pdf},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/xu2008gaussian.pdf}}

  @inbook{kersting2008basic,
  	Author = {Kristian Kersting and Luc De Raedt},
  	Booktitle = {Probabilistic Inductive Logic Programming},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Bayesian Networks, Logic Programming},
  	Note = {Bayesian logic programs tightly integrate definite logic programs with Bayesian networks in order to incorporate the notions of objects and relations into Bayesian networks. They establish a one-to-one mapping between ground atoms and random variables, and between the immediate consequence operator and the directly influenced by relation. In doing so, they nicely separate the qualitative (i.e. logical) component from the quantitative (i.e. the probabilistic) one providing a natural framework to describe general, probabilistic dependencies among sets of random variables. In this chapter, we present results on combining Inductive Logic Programming with Bayesian networks to learn both the qualitative and the quantitative components of Bayesian logic programs from data. More precisely, we show how the qualitative components can be learned by combining the inductive logic programming setting learning from interpretations with score-based techniques for learning Bayesian networks. The estimation of the quantitative components is reduced to the corresponding problem of (dynamic) Bayesian networks.},
  	Pages = {189--221},
  	Publisher = {Springer},
  	Title = {Basic principles of learning Bayesian logic programs},
  	Url = {https://link.springer.com/chapter/10.1007/978-3-540-78652-8_7},
  	Year = {2008},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-540-78652-8_7}}

  @inbook{kersting2008relational,
  	Author = {Kristian Kersting and Luc De Raedt and Bernd Gutmann and Andreas Karwath and Niels Landwehr},
  	Booktitle = {Probabilistic inductive logic programming},
  	Keywords = {Statistical Relational Learning, Complex Sequences, Sequence Labeling, Reinforcement Learning},
  	Note = {Sequential behavior and sequence learning are essential to intelligence. Often the elements of sequences exhibit an internal structure that can elegantly be represented using relational atoms. Applying traditional sequential learning techniques to such relational sequences requires one either to ignore the internal structure or to live with a combinatorial explosion of the model complexity. This chapter briefly reviews relational sequence learning and describes several techniques tailored towards realizing this, such as local pattern mining techniques, (hidden) Markov models, conditional random fields, dynamic programming and reinforcement learning.},
  	Pages = {28--55},
  	Publisher = {Springer},
  	Title = {Relational sequence learning},
  	Url = {https://link.springer.com/chapter/10.1007/978-3-540-78652-8_2},
  	Year = {2008},
  	Bdsk-Url-1 = {https://link.springer.com/chapter/10.1007/978-3-540-78652-8_2}}

  @book{deraedt2008probabilistic,
  	Anote = {./images/pilp2.jpg},
  	Author = {Luc {De Raedt} and Paolo Frasconi and Kristian Kersting and Stephen Muggleton},
  	Keywords = {Statistical Relational Learning, Inductive Logic Programming, Probabilistic Inductive Logic Programming},
  	Note = {This editorial book provides an introduction to statistical relational learning with an emphasis on those methods based on logic programming principles. The question of how to combine probability and logic with learning is getting an increased attention as there is an explosive growth in the amount of heterogeneous data that is being collected in the business and scientific world. The structures encountered can be as simple as sequences and trees or as complex as citation graphs, the World Wide Web, and relational databases},
  	Publisher = {Springer},
  	Series = {Lecture Notes in Computer Science},
  	Title = {Probabilistic Inductive Logic Programming-Theory and Applications},
  	Url = {https://link.springer.com/book/10.1007%2F978-3-540-78652-8},
  	Volume = {4911},
  	Year = {2008},
  	Bdsk-Url-1 = {https://link.springer.com/book/10.1007%2F978-3-540-78652-8}}

  @article{deraedt2008compressing,
  	Anote = {./images/deraedt2008compressing.png},
  	Author = {Luc De Raedt and Kristian Kersting and Angelika Kimmig and Kate Revoredo and Hannu Toivonen},
  	Journal = {Machine Learning (MLJ)},
  	Keywords = {Statistical Relational Learning, Structure Learning, ProbLog, Compression},
  	Note = {A ProbLog program defines a distribution over logic programs by specifying for each clause the probability that it belongs to a randomly sampled program, and these probabilities are mutually independent. The semantics of ProbLog is then defined by the success probability of a query in a randomly sampled program. This paper introduces the theory compression task for ProbLog, which consists of selecting that subset of clauses of a given ProbLog program that maximizes the likelihood w.r.t. a set of positive and negative examples. Experiments in the context of discovering links in real biological networks demonstrate the practical applicability of the approach.},
  	Number = {2},
  	Pages = {151--168},
  	Publisher = {Springer},
  	Title = {Compressing probabilistic Prolog programs},
  	Url = {./papers/deraedt2008compressing.pdf},
  	Volume = {70},
  	Year = {2008},
  	Bdsk-Url-1 = {./papers/deraedt2008compressing.pdf}}

  @inproceedings{plagemann2007gaussian,
  	Anote = {./images/plagemann2007gaussian.png},
  	Author = {Christian Plagemann and Kristian Kersting and Patrick Pfaff and Wolfram Burgard},
  	Booktitle = {Proceedings of Robotics: Science and Systems (RSS)},
  	Keywords = {Gaussian Processes, Laser Range Finders, Robotics},
  	Note = {In probabilistic mobile robotics, the development of measurement models plays a crucial role as it directly influences the efficiency and the robustness of the robot's performance in a great variety of tasks including localization, tracking, and map building. In this paper, we present a novel probabilistic measurement model for range finders, called Gaussian beam processes, which treats the measurement modeling task as a nonparametric Bayesian regression problem and solves it using Gaussian processes. The major benefit of our approach is its ability to generalize over entire range scans directly. This way, we can learn the distributions of range measurements for whole regions of the robot's configuration space from only few recorded or simulated range scans. Especially in approximative approaches to state estimation like particle filtering or histogram filtering, this leads to a better approximation of the true likelihood function. Experiments on real world and synthetic data show that Gaussian beam processes combine the advantages of two popular measurement models.},
  	Title = {Gaussian Beam Processes: A Nonparametric Bayesian Measurement Model for Range Finders},
  	Url = {http://www.roboticsproceedings.org/rss03/p18.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {http://www.roboticsproceedings.org/rss03/p18.pdf}}

  @inproceedings{kerstingPPB07,
  	Anote = {./images/kerstingPPB07.png},
  	Author = {Kristian Kersting and Christian Plagemann and Patrick Pfaff and Wolfram Burgard},
  	Booktitle = {Proceedings of the 24th International Conference Machine Learning (ICML)},
  	Keywords = {Gaussian Processes, Heteroscedastic, Input-Dependent Noise, Sampling},
  	Note = {This paper presents a novel Gaussian process (GP) approach to regression with inputdependent noise rates. We follow Goldberg et al.'s approach and model the noise variance using a second GP in addition to the GP governing the noise-free output value. In contrast to Goldberg et al., however, we do not use a Markov chain Monte Carlo method to approximate the posterior noise variance but a most likely noise approach. The resulting model is easy to implement and can directly be used in combination with various existing extensions of the standard GPs such as sparse approximations. Extensive experiments on both synthetic and real-world data, including a challenging perception problem in robotics, show the effectiveness of most likely heteroscedastic GP regression.},
  	Pages = {393--400},
  	Title = {Most likely heteroscedastic Gaussian process regression},
  	Url = {http://www.machinelearning.org/proceedings/icml2007/papers/326.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {http://www.machinelearning.org/proceedings/icml2007/papers/326.pdf}}

  @incollection{kersting2007reasoning,
  	Anote = {./images/kersting2007reasoning.png},
  	Author = {Kristian Kersting and Brian Milch and Luke Zettlemoyer and Michael Haimes and Leslie Pack Kaelbling},
  	Booktitle = {Working Notes of the NIPS Workshop on Statistical Models of Networks},
  	Keywords = {Statistical Relational AI, Lifted Inference, Exact, C-FOVE},
  	Note = {We use a concrete problem in the context of planning meetings to show how lifted probabilistic inference can dramatically speed up reasoning. We also extend lifted inference to deal with cardinality potentials, and examine how to deal with background knowledge about a social network.},
  	Title = {Reasoning about large populations with lifted probabilistic inference},
  	Url = {./papers/kersting2007reasoning.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/kersting2007reasoning.pdf}}

  @incollection{gutmann2007stratified,
  	Anote = {./images/gutmann2007stratified.png},
  	Author = {Bernd Gutmann and Kristian Kersting},
  	Booktitle = {Wokring Notes of the 6th international workshop on multi-relational data mining (MRDM)},
  	Keywords = {Functional Gradient Boosting, Imbalanced Data, Stratified Sampling, Conditional Random Fields},
  	Note = {Boosting has recently been shown to be a promising approach for training conditional random fields (CRFs) as it allows to effi- ciently induce conjunctive (even relational) features. The potentials are represented as weighted sums of regression trees that are induced using gradient tree boosting. Its large scale application such as in relational domains, however, suffers from two drawbacks: induced trees can spoil previous maximizations and the number of generated regression examples can become quite large. In this paper, we propose to tackle the latter problem by injecting randomness into the regression estimation procedure by subsampling regression examples. Experiments on a real-world data set show that this sampling approach is comparable with more sophisticated boosting algorithms in early iterations and, hence, provides an interesting alternative as it is much simpler to implement.},
  	Pages = {56--68},
  	Title = {Stratified gradient boosting for fast training of conditional random fields},
  	Url = {./papers/gutmann2007stratified.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/gutmann2007stratified.pdf}}

  @incollection{thon2007distributed,
  	Anote = {./images/thon2007distributed.png},
  	Author = {Ingo Thon and Kristian Kersting},
  	Booktitle = {Working Notes of the 6th International Workshop on Multirelational Data Mining (MRDM)},
  	Keywords = {Statistical Relational Learning, Factorial, Logical Hidden Markov Models},
  	Note = {Several promising variants of hidden Markov models (HMMs) have recently been developed to efficiently deal with large state and observation spaces and relational structure. Many application domains, however, have an apriori componential structure such as parts in musical scores. In this case, exact inference within relational HMMs still grows exponentially in the number of components. In this paper, we propose to approximate the complex joint relational HMM with a simpler, distributed one: k relational hidden chains over n states, one for each component. Then, we iteratively perform inference for each chain given fixed values for the other chains until convergence. Due to this structured mean field approximation, the effective size of the hidden state space collapses from exponential to linear.},
  	Pages = {129--140},
  	Title = {Distributed relational state representations for complex stochastic processes (extended abstract)},
  	Url = {./papers/thon2007distributed.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/thon2007distributed.pdf}}

  @incollection{ganzert2007equation,
  	Anote = {./images/ganzert2010identifying.png},
  	Author = {Steven Ganzert and Knut Möller and Kristian Kersting and Luc De Raedt and Josef Guttmann},
  	Booktitle = {Working notes of the ICML workshop on the Induction of Process Models (IPM)},
  	Keywords = {Process Model Induction, Differential Equations, Medicine},
  	Note = {Lung protective ventilation considerably improves the outcome of mechanically ventilated and critically ill patients as it avoids extensive mechanical stress of the lung tissue and hence its irreversible damage. A valid analysis of respiratory mechanics is a prerequisite for lung protective ventilation. This analysis is always based on mathematical models. The equation of motion defines a generally accepted model of the respiratory system. It relates the airway pressure to the ventilator induced airflow and volume application influenced by distensibility and resistance of the respiratory system. We present a novel equation discovery system which combines the technique of using declarative bias for the reduction of the vast search space known from the LAGRAMGE-system with a greedy, randomized search strategy according to GSAT. We experimentally validate the effectiveness of our approach and show that the equation of motion model can automatically be rediscovered from real-world data.},
  	Title = {Equation discovery for model identification in respiratory mechanics under conditions of mechanical ventilation},
  	Url = {./papers/ganzert2007equation.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/ganzert2007equation.pdf}}

  @incollection{gutmann2007mlg,
  	Author = {Bernd Gutmann and Kristian Kersting},
  	Booktitle = {Working Notes of the 5th International Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {Statistical Relational Learning, Funcational Gradient Boosting, Conditional Random Fields, Imbalanced Data},
  	Note = {Boosting has recently been shown to be a promising approach for training conditional random fields (CRFs) as it allows to efficiently induce conjunctive (even relational) features. The potentials are represented as weighted sums of regression trees that are induced using gradient tree boosting. Its large scale application, however, suffers from two drawbacks: induced trees can spoil previous maximizations and the number of generated regression examples can become quite large. In this paper, we propose to tackle the latter problem by injecting randomness in the regression estimation procedure due to subsampling regression examples.},
  	Pages = {131--134},
  	Title = {Stratified conjugate gradient boosting for fast training of conditional random fields (extended abstract)},
  	Url = {./images/gutmann2007mlg.pdf},
  	Year = {2007},
  	Bdsk-Url-1 = {./images/gutmann2007mlg.pdf}}

    @inproceedings{landwehrKR05,
    author    = {Niels Landwehr and
                 Kristian Kersting and
                 Luc De Raedt},
    Anote = {./images/landwehr2007integrating.png},
    Keywords = {Statistical Relational Learning, FOIL, Naive Bayes, Rule Learning},
    title     = {nFOIL: Integrating Naive Bayes and {FOIL}},
    booktitle = {Proceedings of the Twentieth National Conference on Artificial Intelligence
                 (AAAI))},
    pages     = {795--800},
    year      = {2005}
  }

  @article{landwehr2007integrating,
  	Anote = {./images/landwehr2007integrating.png},
  	Author = {Niels Landwehr and Kristian Kersting and Luc De Raedt},
  	Journal = {Journal of Machine Learning Research (JMLR)},
  	Keywords = {Statistical Relational Learning, FOIL, Naive Bayes, Rule Learning},
  	Note = {A novel relational learning approach that tightly integrates the na¨ıve Bayes learning scheme with the inductive logic programming rule-learner FOIL is presented. In contrast to previous combinations that have employed na¨ıve Bayes only for post-processing the rule sets, the presented approach employs the na¨ıve Bayes criterion to guide its search directly.
    The proposed technique is implemented in the NFOIL and TFOIL systems, which employ standard na¨ıve Bayes and tree augmented na¨ıve Bayes models respectively. We show that these integrated approaches to probabilistic model and rule learning outperform post-processing approaches. They also yield significantly more accurate models than simple rule learning and are competitive with more sophisticated ILP systems.},
  	Number = {Mar},
  	Pages = {481--507},
  	Title = {Integrating naive bayes and foil},
  	Url = {./papers/landwehr2007integrating.pdf},
  	Volume = {8},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/landwehr2007integrating.pdf}}

  @article{kersting2007learning,
  	Anote = {./images/kersting2007learning.png},
  	Author = {Kristian Kersting and Christian Plagemann and Alexandru Cocora and Wolfram Burgard and Luc De Raedt},
  	Journal = {Advanced Robotics},
  	Keywords = {Statistical Relational Learning, Robotics, Policy Learning, Relational Domains},
  	Note = {Autonomous agents that act in the real world utilizing sensory input greatly rely on the ability to plan their actions and to transfer these skills across tasks. The majority of path-planning approaches for mobile robots, however, solve the current navigation problem from scratch given the current and goal configuration of the robot. Consequently, these approaches yield highly efficient plans for the specific situation, but the computed policies typically do not transfer to other, similar tasks. In this paper, we propose to apply techniques from statistical relational learning to the pathplanning problem. More precisely, we propose to learn relational decision trees as abstract navigation strategies from example paths. Relational abstraction has several interesting and important properties. First, it allows a mobile robot to imitate navigation behavior shown by users or by optimal policies. Second, it yields comprehensible models of behavior. Finally, a navigation policy learned in one environment naturally transfers to unknown environments. In several experiments with real robots and in simulated runs, we demonstrate that our approach yields efficient navigation plans. We show that our system is robust against observation noise and can outperform hand-crafted policies.},
  	Number = {13},
  	Pages = {1565--1582},
  	Publisher = {Taylor \& Francis Group},
  	Title = {Learning to transfer optimal navigation policies},
  	Url = {./papers/kersting2007learning.pdf},
  	Volume = {21},
  	Year = {2007},
  	Bdsk-Url-1 = {./papers/kersting2007learning.pdf}}

  @inproceedings{gutmann2006tildecrf,
  	Anote = {./images/gutmann2006tildecrf.png},
  	Author = {Bernd Gutmann and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning (ECML)},
  	Keywords = {Statistical Relational Learning, Sequence Labeleing, Conditional Random Fields, Functional Gradient Boosting},
  	Note = {Conditional Random Fields (CRFs) provide a powerful instrument for labeling sequences. So far, however, CRFs have only been considered for labeling sequences over flat alphabets. In this paper, we describe TildeCRF, the first method for training CRFs on logical sequences, i.e., sequences over an alphabet of logical atoms. TildeCRF's key idea is to use relational regression trees in Dietterich et al.'s gradient tree boosting approach. Thus, the CRF potential functions are represented as weighted sums of relational regression trees. Experiments show a significant improvement over established results achieved with hidden Markov models and Fisher kernels for logical sequences.},
  	Organization = {Springer},
  	Pages = {174--185},
  	Title = {TildeCRF: Conditional random fields for logical sequences},
  	Url = {./papers/gutmann2006tildecrf.pdf},
  	Year = {2006},
    Key = {Best Student Paper Award at ECML 2006},
  	Bdsk-Url-1 = {./papers/gutmann2006tildecrf.pdf}}

  @inproceedings{triebel2006robust,
  	Anote = {./images/triebel2006robust.png},
  	Author = {Rudolph Triebel and Kristian Kersting and Wolfram Burgard},
  	Booktitle = {Proceedings of the IEEE International Conference on Robotics and Automation (ICRA)},
  	Keywords = {3D, Associative Markov Networks, Computer Vision, Laser Range Finders},
  	Note = {In this paper we present an efficient technique to learn Associative Markov Networks (AMNs) for the segmentation of 3D scan data. Our technique is an extension of the work recently presented by Anguelov et al. [1], in which AMNs are applied and the learning is done using max-margin optimization. In this paper we show that by adaptively reducing the training data, the training process can be performed much more efficiently while still achieving good classification results. The reduction is obtained by utilizing kd-trees and pruning them appropriately. Our algorithm does not require any additional parameters and yields an abstraction of the training data. In experiments with real data collected from a mobile outdoor robot we demonstrate that our approach yields accurate segmentations.},
  	Organization = {IEEE},
  	Pages = {2603--2608},
  	Title = {Robust 3D scan point classification using associative Markov networks},
  	Url = {./papers/triebel2006robust.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/triebel2006robust.pdf}}

  @inproceedings{cocora2006learning,
  	Anote = {./images/cocora2006learning.png},
  	Author = {Alexandru Cocora and Kristian Kersting and Christian Plagemann and Wolfram Burgard and Luc De Raedt},
  	Booktitle = {Proceedings of the IEEE International Conference on Intelligent Robots and Systems (IROS)},
  	Keywords = {Statistical Relational Learning, Relational Reinforcement Learning, Robotics, Optimal Actions, Policy Learning},
  	Note = {Navigation is one of the fundamental tasks for a mobile robot. The majority of path planning approaches has been designed to entirely solve the given problem from scratch given the current and goal configurations of the robot. Although these approaches yield highly efficient plans, the computed policies typically do not transfer to other, similar tasks. We propose to learn relational decision trees as abstract navigation strategies from example paths. Relational abstraction has several interesting and important properties. First, it allows a mobile robot to generalize navigation plans from specific examples provided by users or exploration. Second, the navigation policy learned in one environment can be transferred to unknown environments. In several experiments with real robots in a real environment and in simulated runs, we demonstrate the usefulness of our approach.},
  	Organization = {IEEE},
  	Pages = {2792--2797},
  	Title = {Learning relational navigation policies},
  	Url = {./papers/cocora2006learning.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/cocora2006learning.pdf}}

  @inproceedings{kersting2006fisher,
  	Anote = {./images/kersting2006fisher.png},
  	Author = {Uwe Dick and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning (ECML)},
  	Keywords = {Statistical Relational Learning, Fisher Kernels, Classification, Relational Data},
  	Note = {Combining statistical and relational learning receives currently a lot of attention. The majority of statistical relational learning approaches focus on density estimation. For classification, however, it is well-known that the performance of such generative models is often lower than that of discriminative classifiers. One approach to improve the performance of generative models is to combine them with discriminative algorithms. Fisher kernels were developed to combine them with kernel methods, and have shown promising results for the combinations of support vector machines with (logical) hidden Markov models and Bayesian networks. So far, however, Fisher kernels have not been considered for relational data, i.e., data consisting of a collection of objects and relational among these objects. In this paper, we develop Fisher kernels for relational data and empirically show that they can significantly improve over the results achieved without Fisher kernels.},
  	Organization = {Springer},
  	Pages = {114-125},
  	Title = {Fisher kernels for relational data},
  	Url = {./papers/kersting2006fisher.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/kersting2006fisher.pdf}}

  @inproceedings{karwath2006relational,
  	Anote = {./images/karwath2006relational.png},
  	Author = {Andreas Karwath and Kristian Kersting},
  	Booktitle = {Proceedings of the 16th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Complex Sequences, Alignements, Logos},
  	Note = {The need to measure sequence similarity arises in many applicitation domains and often coincides with sequence alignment: the more similar two sequences are, the better they can be aligned. Aligning sequences not only shows how similar sequences are, it also shows where there are differences and correspondences between the sequences. Traditionally, the alignment has been considered for sequences of flat symbols only. Many real world sequences such as natural language sentences and protein secondary structures, however, exhibit rich internal structures. This is akin to the problem of dealing with structured examples studied in the field of inductive logic programming (ILP). In this paper, we introduce Real, which is a powerful, yet simple approach to align sequence of structured symbols using well-established ILP distance measures within traditional alignment methods. Although straight-forward, experiments on protein data and Medline abstracts show that this approach works well in practice, that the resulting alignments can indeed provide more information than flat ones, and that they are meaningful to experts when represented graphically.},
  	Organization = {Springer},
  	Pages = {290--304},
  	Title = {Relational sequence alignments and logos},
  	Url = {./papaer/karwath2006relational.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papaer/karwath2006relational.pdf}}

  @inproceedings{deraedt2006revising,
  	Author = {Luc De Raedt and Kristian Kersting and Angelika Kimmig and Kate Revoredo and Hannu Toivonen},
  	Booktitle = {Proceedings of the 16th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Theory Revision, ProbLog},
  	Note = {In a recently submitted paper [1], the ProbLog (probabilistic prolog) language has been introduced and various algorithms have been developed for solving and approximating ProbLog queries. Here, we define and study the problem of revising ProbLog theories from examples.},
  	Organization = {Springer},
  	Pages = {30--33},
  	Title = {Revising probabilistic prolog programs},
  	Url = {./papers/deraedt2006revising.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/deraedt2006revising.pdf}}

  @incollection{jaeger2006expressivity,
  	Author = {Manfred Jaeger and Kristian Kersting and Luc De Raedt},
  	Booktitle = {Working Notes of the ICML Workshop Open Problems in Statistial Relational Learning (SRL)},
  	Keywords = {Statistical RelationaL Learning, Expressivity},
  	Note = {We propose a framework for analyzing the expressivity of probabilistic logical languages.},
  	Title = {Expressivity analysis for pl-languages},
  	Url = {./papers/jaeger2006expressivity.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/jaeger2006expressivity.pdf}}

  @incollection{kersting2006unbiased,
  	Author = {Kristian Kersting and Bernd Gutmann},
  	Booktitle = {Proceedings of the International Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {Conditional Random Fields, Functional Gradient Boosting, Conjuate Gradients, Selection Bias},
  	Note = {Conditional Random Fields (CRFs) currently receive a lot of attention for labeling sequences. To train CRFs, Dietterich et al. proposed a functional gradient optimization approach: the potential functions are represented as weighted sums of regression trees that are induced using Friedman's gradient tree boosting method. In this paper, we improve upon this approach in two ways. First, we identify an expectation selection bias implicitly imposed and compensate for it. Second, we employ a more sophisticated boosting algorithm based on conjugate gradients in function space. Initial experiments show performance gains over the basic functional gradient approach.},
  	Pages = {157--164},
  	Title = {Unbiased conjugate direction boosting for conditional random fields},
  	Url = {./papers/kersting2006unbiased.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/kersting2006unbiased.pdf}}

  @incollection{karwath2006relational,
  	Author = {Andreas Karwath and Kristian Kersting},
  	Booktitle = {Working Notes of the International Workshop on Mining and Learning with Graphs (MLG)},
  	Keywords = {Statistical Relational Learning, Complex Sequences, Alignment, Sequence Logos},
  	Note = {The need to measure sequence similarity arises in information extraction, music mining, biological sequence analysis, and other domains, and often coincides with sequence alignment: the more similar two sequences are, the better they can be aligned. Aligning sequences not only shows how similar sequences are, it also shows where there are differences and correspondences between the sequences. Traditionally, the alignment has been considered for sequences of flat symbols only. Many real world sequences such as protein secondary structures, however, exhibit a rich internal structures. This is akin to the problem of dealing with structured examples studied in the field of inductive logic programming (ILP). In this paper, we propose to use wellestablished ILP distance measures within alignment methods. Although straight-forward, our initial experimental results show that this approach performs well in practice and is worth to be explored.},
  	Title = {Relational Sequence Alignment},
  	Url = {./papers/karwath2006relational.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {./papers/karwath2006relational.pdf}}

  @book{kersting20016,
  	Anote = {./images/diss.jpg},
  	Author = {Kristian Kersting},
  	Keywords = {Statistical Relational Learning, Ph.D. Thesis, Inductive Logic Programming, Relational Fisher Kernels, Bayesian Logic Programs, Logical Hidden Markov Models},
  	Note = {This books addresses Probabilistic Inductive Logic Programming. The new area is closely tied to, though strictly subsumes, a new field known as `Statistical Relational Learning' which has in the last few years gained major prominence in the AI community. The book makes several contributions, including the introduction of a series of definitions which circumscribe the new area formed by extending Inductive Logic Programming to the case in which clauses are annotated with probability values. Also, it introduces Bayesian logic programs and investigates the approach of Learning from proofs and the issue of upgrading Fisher Kernels to Relational Fisher kernels.},
  	Publisher = {IOS Press},
  	Series = {Frontiers in Artificial Intelligence and Applications},
  	Title = {An Inductive Logic Programming Approach to Statistical Relational Learning},
  	Url = {http://www.iospress.nl/book/an-inductive-logic-programming-approach-to-statistical-relational-learning/},
  	Volume = {148},
  	Year = {2006},
    Key = {EurAI (formerly ECCAI) Dissertation Award 2006},
  	Bdsk-Url-1 = {http://www.iospress.nl/book/an-inductive-logic-programming-approach-to-statistical-relational-learning/}}

  @article{deraedt2006logical,
  	Anote = {./images/deraedt2006logical.png},
  	Author = {Luc De Raedt and Kristian Kersting and Tapani Raiko},
  	Journal = {Journal of Artificial Intelligence Research (JAIR)},
  	Keywords = {Statistical Relational Learning, Complex Sequences, Logical Hidden Markov Models},
  	Note = {Logical hidden Markov models (LOHMMs) upgrade traditional hidden Markov models to deal with sequences of structured symbols in the form of logical atoms, rather than flat characters. This note formally introduces LOHMMs and presents solutions to the three central inference problems for LOHMMs: evaluation, most likely hidden state sequence and parameter estimation. The resulting representation and algorithms are experimentally evaluated on problems from the domain of bioinformatics.},
  	Title = {Logical Hidden Markov Models},
  	Url = {http://www.jair.org/media/1675/live-1675-2623-jair.pdf},
  	Year = {2006},
  	Bdsk-Url-1 = {http://www.jair.org/media/1675/live-1675-2623-jair.pdf}}

  @inproceedings{kersting2005say,
  	Anote = {./images/kersting2012say.png},
  	Author = {Kristian Kersting and Tapani Raiko},
  	Booktitle = {Proceedings of the 21st Conference in Uncertainty in Artificial Intelligence (UAI)},
  	Keywords = {Statistical Relational Learning, Structural EM, Structure Learning, Complex Sequences},
  	Note = {Many real world sequences such as protein secondary structures or shell logs exhibit a rich internal structures. Traditional probabilistic models of sequences, however, consider sequences of flat symbols only. Logical hidden Markov models have been proposed as one solution. They deal with logical sequences, i.e., sequences over an alphabet of logical atoms. This comes at the expense of a more complex model selection problem. Indeed, different abstraction levels have to be explored. In this paper, we propose a novel method for selecting logical hidden Markov models from data called SAGEM. SAGEM combines generalized expectation maximization, which optimizes parameters, with structure search for model selection using inductive logic programming refinement operators. We provide convergence and experimental results that show SAGEM's effectiveness.},
  	Title = {'Say EM'for selecting probabilistic models for logical sequences},
  	Url = {https://arxiv.org/pdf/1207.1353.pdf},
  	Year = {2005},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/1207.1353.pdf}}

  @inproceedings{deraedtKT05,
  	Anote = {./images/deraedtKT05.png},
  	Author = {Luc De Raedt and Kristian Kersting and Sunna Torge},
  	Booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence (AAAI)},
  	Keywords = {Statistical Relational Learning, Stochastic Logic Programs, Structure Learning, Proof-Banks, Compression},
  	Note = {Stochastic logic programs combine ideas from probabilistic grammars with the expressive power of definite clause logic; as such they can be considered as an extension of probabilistic context-free grammars. Motivated by an analogy with learning tree-bank grammars, we study how to learn stochastic logic programs from proof-trees. Using proof-trees as examples imposes strong logical constraints on the structure of the target stochastic logic program. These constraints can be integrated in the least general generalization (lgg) operator, which is employed to traverse the search space. Our implementation employs a greedy search guided by the maximum likelihood principle and failure-adjusted maximization. We also report on a number of simple experiments that show the promise of the approach.},
  	Pages = {752--757},
  	Title = {Towards Learning Stochastic Logic Programs from Proof-Banks},
  	Url = {http://www.aaai.org/Papers/AAAI/2005/AAAI05-118.pdf},
  	Year = {2005},
  	Bdsk-Url-1 = {http://www.aaai.org/Papers/AAAI/2005/AAAI05-118.pdf}}

  @incollection{gurel2005trade,
  	Author = {Tayfun Gürel and Kristian Kersting},
  	Booktitle = {Working Notes of the 3rd International ECML/PKDD Workshop on Mining Graphs, Trees, and Sequences},
  	Keywords = {Collective Classification, Iterative Classification},
  	Note = {There have been two major approaches for classification of networked (linked) data. Local approaches (iterative classification) learn a model locally without considering unlabeled data and apply the model iteratively to classify unlabeled data. Global approaches (collective classification), on the other hand, exploit unlabeled data and the links occurring between labeled and unlabeled data for learning. Naturally, global approaches are computationally more demanding than local ones. Moreover, for large data sets, approximate inference has to be performed to make computations feasible. In the present work, we investigate the benefits of collective classification based on global probabilistic models over local approaches. Our experimental results show that global approaches do not always outperform local approaches with respect to the classification accuracy. More precisely, the results suggest that global approaches considerably outperform local approaches only for low ratios of labeled data.},
  	Title = {On the trade-off between iterative classification and collective classification: First experimental results},
  	Url = {./papers/gurel2005trade.pdf},
  	Year = {2005},
  	Bdsk-Url-1 = {./papers/gurel2005trade.pdf}}

  @inproceedings{dereadt2004probabilistic,
  	Author = {Luc {De Raedt} and Kristian Kersting},
  	Booktitle = {International Conference on Algorithmic Learning Theory (ALT)},
  	Keywords = {Statistical Relational Learning, Inductive Logic Programming, Probabilistic Inductive Logic Programming},
  	Note = {Probabilistic inductive logic programming, sometimes also called statistical relational learning, addresses one of the central questions of artificial intelligence: the integration of probabilistic reasoning with first order logic representations and machine learning. A rich variety of different formalisms and learning techniques have been developed. In the present paper, we start from inductive logic programming and sketch how it can be extended with probabilistic methods. More precisely, we outline three classical settings for inductive logic programming, namely learning from entailment, learning from interpretations, and learning from proofs or traces, and show how they can be used to learn different types of probabilistic representations.},
  	Organization = {Springer},
  	Pages = {19--36},
  	Title = {Probabilistic inductive logic programming},
  	Url = {./papers/dereadt2004probabilistic.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/dereadt2004probabilistic.pdf}}

  @incollection{kersting2004logical,
  	Anote = {./images/kersting2004logical.png},
  	Author = {Kristian Kersting and Luc De Raedt},
  	Booktitle = {Proceedings of the 14th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Relational Reinforcement Learning, Temporal Difference Learning, Optimal Actions},
  	Note = {Recent developments in the area of relational reinforcement learning (RRL) have resulted in a number of new algorithms. A theory, however, that explains why RRL works, seems to be lacking. In this paper, we provide some initial results on a theory of RRL. To realize this, we introduce a novel representation formalism, called logical Markov decision programs (LOMDPs), that integrates Markov Decision Processes (MDPs) with Logic Programs. Using LOMDPs one can compactly and declaratively represent complex MDPs. Within this framework we then devise a relational upgrade of TD(λ) called logical TD(λ) and prove convergence. Experiments validate our approach.},
  	Organization = {Springer},
  	Pages = {180--197},
  	Title = {Logical Markov decision programs and the convergence of logical TD(λ)},
  	Url = {./papers/kersting2004logical.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/kersting2004logical.pdf}}

  @inproceedings{kersting2004fisher,
  	Anote = {./images/kersting2004fisher.png},
  	Author = {Kristian Kersting and Thomas G{\"a}rtner},
  	Booktitle = {Proceedings of the European Conference on Machine Learning (ECML)},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Fisher Kernels},
  	Note = {One approach to improve the accuracy of classifications based on generative models is to combine them with successful discriminative algorithms. Fisher kernels were developed to combine generative models with a currently very popular class of learning algorithms, kernel methods. Empirically, the combination of hidden Markov models with support vector machines has shown promising results. So far, however, Fisher kernels have only been considered for sequences over flat alphabets. This is mostly due to the lack of a method for computing the gradient of a generative model over structured sequences. In this paper, we show how to compute the gradient of logical hidden Markov models, which allow for the modelling of logical sequences, i.e., sequences over an alphabet of logical atoms. Experiments show a considerable improvement over results achieved without Fisher kernels for logical sequences.},
  	Organization = {Springer},
  	Pages = {205--216},
  	Title = {Fisher kernels for logical sequences},
  	Url = {./papers/kersting2004fisher.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/kersting2004fisher.pdf}}

  @inproceedings{kersting2004balios,
  	Anote = {./images/kersting2004balios.png},
  	Author = {Kristian Kersting and Uwe Dick},
  	Booktitle = {Proceedings of the 8th European Conference on Principles and Practice of Knowledge Discovery in Databases (PKDD)},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programms, Graphical Represenation},
  	Note = {Balios is an inference engine for Bayesian logic programs (BLPs). BLPs combine BNs with definite clause logic. The basic idea is to view logical atoms as sets of random variables which are similar to each other.},
  	Organization = {Springer},
  	Pages = {549--551},
  	Title = {Balios--the engine for Bayesian logic programs},
  	Url = {./papers/kersting2004balios.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/kersting2004balios.pdf}}

  @inproceedings{kerstingOR04,
  	Anote = {./images/kerstingOR04.png},
  	Author = {Kristian Kersting and Martijn {van Otterlo} and Luc {De Raedt}},
  	Booktitle = {Proceedings of the Twenty-first International Conference on Machine Learning (ICML)},
  	Keywords = {Statistical Relational AI, Statistical Relational Learning, Optimal Actions, Relational Markov Decision Processes, Value Iteration},
  	Note = {Motivated by the interest in relational reinforcement learning, we introduce a novel relational Bellman update operator called ReBel. It employs a constraint logic programming language to compactly represent Markov decision processes over relational domains. Using ReBel, a novel value iteration algorithm is developed in which abstraction (over states and actions) plays a major role. This framework provides new insights into relational reinforcement learning. Convergence results as well as experiments are presented.},
  	Title = {Bellman goes relational},
  	Url = {./papers/kerstingOR04.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/kerstingOR04.pdf}}

  @incollection{otterlo2004challenges,
  	Author = {Martijn Otterlo and Kristian Kersting},
  	Booktitle = {Working Notes of the ICML Workshop on Relational Reinforcement Learning (RRL)},
  	Keywords = {Statistical Relational Learning, Relational Reinforcment Learning},
  	Note = {We present a perspective and challenges for Relational Reinforcement Learning (RRL). We first survey existing work and distinguish a number of main directions. We then highlight some research problems that are intrinsically involved in abstracting over relational Markov Decision Processes. These are the challenges of RRL. In addition, we describe a number of issues that will be important for further research into RRL. These are the challenges for RRL and deal with newly arising issues because of relational abstraction.},
  	Pages = {74--80},
  	Title = {Challenges for relational reinforcement learning},
  	Url = {./papers/otterlo2004challenges.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/otterlo2004challenges.pdf}}

  @inbook{kersting2004scaled,
  	Author = {Kristian Kersting and Niels Landwehr},
  	Booktitle = {Advances in Bayesian Networks},
  	Keywords = {Graphical Models, Parameter Estimation, Scaled Conjugate Gradient},
  	Note = {To learn Bayesian networks, one must estimate the parameters of the network from the data. EM (Expectation-Maximization) and gradient-based algorithms are the two best known techniques to estimate these parameters. Although the theoretical properties of these two frameworks are well-studied, it remains an open question as to when and whether EM is to be preferred over gradients. We will answer this question empirically. More specifically, we first adapt scaled conjugate gradients well-known from neural network learning. This accelerated conjugate gradient avoids the time consuming line search of more traditional methods. Secondly, we empirically compare scaled conjugate gradients with EM. The experiments show that accelerated conjugate gradients are competitive with EM. Although, in general EM is the domain independent method of choice, gradient-based methods can be superior.},
  	Pages = {235--254},
  	Publisher = {Springer},
  	Title = {Scaled conjugate gradients for maximum likelihood: An empirical comparison with the EM algorithm},
  	Url = {./papers/kersting2004scaled.pdf},
  	Year = {2004},
  	Bdsk-Url-1 = {./papers/kersting2004scaled.pdf}}

  @inproceedings{kersting2003towards,
  	Author = {Kristian Kersting and Tapani Raiko and Stefan Kramer and Luc De Raedt},
  	Booktitle = {Proceedings of the Pacific Symposium on Biocomputing (PSB)},
  	Keywords = {Statistical Relational Learning, Protein Folding, Logical Hidden Markov Models, Complex Sequences},
  	Note = {With the growing number of determined protein structures and the availability of classification schemes, it becomes increasingly important to develop computer methods that automatically extract structural signatures for classes of proteins. In this paper, we introduce and apply a new Machine Learning technique, Logical Hidden Markov Models (LOHMMs), to the task of finding structural signatures of folds according to the classification scheme SCOP. Our results indicate that LOHMMs are applicable to this task and possess several advantages over other approaches.},
  	Pages = {192--203},
  	Title = {Towards discovering structural signatures of protein folds based on logical hidden Markov models},
  	Url = {./papers/kersting2003towards.pdf},
  	Volume = {8},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/kersting2003towards.pdf}}

  @inproceedings{kersting2003logical,
  	Author = {Kristian Kersting and Luc De Raedt},
  	Booktitle = {Proceedings of the IJCAI Workshop on Learning Statistical Models of Relational Data (SRL)},
  	Keywords = {Statistical Relational Learning, Complex Decision, Markov Decision Processes, Optimal Actions},
  	Note = {Motivated by the interest in relational reinforcement learning, we introduce a novel representation formalism, called logical Markov decision programs (LOMDPs), that integrates Markov Decision Processes with Logic Programs. Using LOMDPs one can compactly and declaratively represent complex relational Markov decision processes. Within this framework we then develop a theory of reinforcement learning in which abstraction (of states and actions) plays a major role. The framework presented should provide a basis for further developments in relational reinforcement learning.},
  	Pages = {63--70},
  	Title = {Logical markov decision programs},
  	Url = {./papers/kersting2003logical.pdf},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/kersting2003logical.pdf}}

  @inproceedings{fischer2003scaled,
  	Author = {Jörg Fischer and Kristian Kersting},
  	Booktitle = {Proceedings of the European Conference on Machine Learning (ECML)},
  	Keywords = {Graphical Models, Parameter Estimation, Expectation-Maximization, Scaled Conjugate EM},
  	Note = {The EM algorithm is a popular method for maximum likelihood estimation of Bayesian networks in the presence of missing data. Its simplicity and general convergence properties make it very attractive. However, it sometimes converges slowly. Several accelerated EM methods based on gradient-based optimization techniques have been proposed. In principle, they all employ a line search involving several NP-hard likelihood evaluations. We propose a novel acceleration called SCGEM based on scaled conjugate gradients (SCGs) well-known from learning neural networks. SCGEM avoids the line search by adopting the scaling mechanism of SCGs applied to the expected information matrix. This guarantees a single likelihood evaluation per iteration. We empirically compare SCGEM with EM and conventional conjugate gradient accelerated EM. The experiments show that SCGEM can significantly accelerate both of them and is equal in quality.},
  	Organization = {Springer},
  	Pages = {133--144},
  	Title = {Scaled CGEM: A fast accelerated EM},
  	Url = {./papers/fischer2003scaled.pdf},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/fischer2003scaled.pdf}}

  @incollection{kersting2003representational,
  	Author = {Kristian Kersting},
  	Booktitle = {In IJCAI Workshop on Learning Statistical Models from Relational Data (SRL)},
  	Keywords = {Statistical Relational Learning, Representional Power},
  	Note = {There is a diversity of probabilistic-logical models (PLM). No clear understanding of the relative advantages and limitations of different formalisms and their language concepts has yet emerged. To overcome this, we propose to downgrade highly expressive PLMs. This method has several advantages: one can profit from existing research on PLMs and inherit unique semantics, and inference and learning algorithms. Moreover, there is a clear relationship between the new PLM and its more expressive counterpart. No single existing approach is devalued.},
  	Title = {Representational power of probabilistic-logical models: From upgrading to downgrading},
  	Url = {./papers/kersting2003representational.pdf},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/kersting2003representational.pdf}}

  @incollection{kersting2003structural,
  	Author = {Kristian Kersting and Tapani Raiko and Luc {De Raedt}},
  	Booktitle = {Working Notes of the Second KDD Workshop on Multi-Relational Data Mining (MRDM)},
  	Keywords = {Complex Sequences, Statistical Relational Learning, Hidden markov Models, Structural EM},
  	Note = {The compactness of Logical HMMS (LOHHMS) comes at the expense of a more complex structure learning problem. We proposed to adapt Driedman's structural EM. The method combines generalized expectation maximization for parameter estimation with structure search for model selection using inductiv logic programming.},
  	Title = {A structural GEM for learning logical hidden markov models},
  	Url = {./papers/kersting2003structural.pdf},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/kersting2003structural.pdf}}

  @article{deraedt2003probabilistic,
  	Anote = {./images/deraedt2003probabilistic.png},
  	Author = {Luc {De Raedt} and Kristian Kersting},
  	Journal = {ACM SIGKDD Explorations Newsletter},
  	Keywords = {Overview, Introduction, Statistical Relational Learning, Inductive Logic Programming},
  	Note = {The past few years have witnessed an significant interest in probabilistic logic learning, i.e. in research lying at the intersection of probabilistic reasoning, logical representations, and machine learning. A rich variety of different formalisms and learning techniques have been developed. This paper provides an introductory survey and overview of the stateof-the-art in probabilistic logic learning through the identi- fication of a number of important probabilistic, logical and learning concepts.},
  	Number = {1},
  	Pages = {31--48},
  	Publisher = {ACM},
  	Title = {Probabilistic logic learning},
  	Url = {./papers/deraedt2003probabilistic.pdf},
  	Volume = {5},
  	Year = {2003},
  	Bdsk-Url-1 = {./papers/deraedt2003probabilistic.pdf}}

  @inproceedings{raiko2002bayesian,
  	Author = {Tapani Raiko and Kristian Kersting and Juha Karhunen and Luc {De Raedt}},
  	Booktitle = {Proceedings of the Finnish Artificial Intelligence Conference (STeP)},
  	Keywords = {Statistical Relational Learning, Logical Hidden Markov Models, Priors, Complex Sequences},
  	Note = {Logial hidden Markov models (LOHMMs) are a generalisation of hidden Markov models to analyze sequenes of logial atoms. Transitions are fatorized into two steps, seleting an atom and instantiating the variables. Unification is used to share information among states, and between states and observations. In this paper, we show how LOHMMs an be learned using Bayesian methods. Some estimators are ompared and parameter estimation is tested with syntheti data.},
  	Pages = {64--71},
  	Title = {Bayesian learning of logical hidden markov models},
  	Url = {./papers/raiko2002bayesian.pdf},
  	Year = {2002},
  	Bdsk-Url-1 = {./papers/raiko2002bayesian.pdf}}

  @incollection{kersting2002scaled,
  	Author = {Kristian Kersting and Niels Landwehr},
  	Booktitle = {Proceedings of the First European Workshop on Probabilistic Graphical Models (PGM)},
  	Keywords = {Graphical Models, Parameter Estimation, Scaled Conjugate Gradient},
  	Note = {To learn Bayesian networks, one must estimate the parameters of the network from the data. EM (Expectation-Maximization) and gradient-based algorithms are the two best known techniques to estimate these parameters. Although the theoretical properties of these two frameworks are well-studied, it remains an open question as to when and whether EM is to be preferred over gradients. We will answer this question empirically. More specifically, we first adapt scaled conjugate gradients well-known from neural network learning. This accelerated conjugate gradient avoids the time consuming line search of more traditional methods. Secondly, we empirically compare scaled conjugate gradients with EM. The experiments show that accelerated conjugate gradients are competitive with EM. Although, in general EM is the domain independent method of choice, gradient-based methods can be superior.},
  	Title = {Scaled Conjugate Gradients for Maximum Likelihood: An Empirical Comparison with the {EM} Algorithm},
  	Url = {./papers/kersting2002scaled.pdf},
  	Year = {2002},
  	Bdsk-Url-1 = {./papers/kersting2002scaled.pdf}}

  @incollection{kersting2002fisher,
  	Author = {Kristian Kersting and Thomas G{\"a}rtner},
  	Booktitle = {Working Notes of the NIPS Workshop on Machine Learning Techniques for Bioinformatics},
  	Keywords = {Statistical Relational learning, Complex Sequences, Fisher Kernels},
  	Note = {The present work investigates whether the predictive accuracy of LOHMMs can be im- proved using Fisher kernels and support vector machines. For that, we devise a method to compute the gradient of the log likelihood with respect to the parameters of a LOHMM.},
  	Title = {Fisher Kernels and Logical Sequences with an Application to Protein Fold Recognition},
  	Url = {./papers/kersting2002fisher.pdf},
  	Year = {2002},
  	Bdsk-Url-1 = {./papers/kersting2002fisher.pdf}}

  @incollection{kersting2002lohmms,
  	Author = {Kristian Kersting and Tapani Raiko and Luc {De Raedt}},
  	Booktitle = {Proceedings of the First European Workshop on Graphical Models (PGM)},
  	Keywords = {Statistical Relational AI, Complex Sequences, Logical Hidden Markov Models},
  	Note = {Logical hidden Markov models (LOHMMS) are a generalization of hidden Markov models (HMMs) to analyze sequences of logical atoms. In LOHMMs, abstract states summarize aets of states and are represented by logical atoms. Transitions are defined between astract states to saummarize sets of transitions between states. Unification is used to share information among states, and betweens tates and observations.},
  	Pages = {99-107},
  	Title = {Logical Hidden Markov Models (Extended Abstract)},
  	Url = {./papers/kersting2002lohmms.pdf},
  	Year = {2002},
  	Bdsk-Url-1 = {./papers/kersting2002lohmms.pdf}}

  @article{ganzert2002analysis,
  	Anote = {./images/ganzert2002analysis.png},
  	Author = {Steven Ganzert and Josef Guttmann and Kristian Kersting and Ralf Kuhlen and Christian Putensen and Michael Sydow and Stefan Kramer},
  	Journal = {Artificial Intelligence in Medicine},
  	Keywords = {Intensive Care Medicine, Process Induction, Differential Equations},
  	Note = {We present a case study of machine learning and data mining in intensive care medicine. In the study, we compared different methods of measuring pressure--volume curves in artificially ventilated patients suffering from the adult respiratory distress syndrome (ARDS). Our aim was to show that inductive machine learning can be used to gain insights into differences and similarities among these methods. We defined two tasks: the first one was to recognize the measurement method producing a given pressure--volume curve. This was defined as the task of classifying pressure--volume curves (the classes being the measurement methods). The second was to model the curves themselves, that is, to predict the volume given the pressure, the measurement method and the patient data. Clearly, this can be defined as a regression task. For these two tasks, we applied C5.0 and CUBIST, two inductive machine learning tools, respectively. Apart from medical findings regarding the characteristics of the measurement methods, we found some evidence showing the value of an abstract representation for classifying curves: normalization and high-level descriptors from curve fitting played a crucial role in obtaining reasonably accurate models. Another useful feature of algorithms for inductive machine learning is the possibility of incorporating background knowledge. In our study, the incorporation of patient data helped to improve regression results dramatically, which might open the door for the individual respiratory treatment of patients in the future.},
  	Number = {1},
  	Pages = {69--86},
  	Publisher = {Elsevier},
  	Title = {Analysis of respiratory pressure--volume curves in intensive care medicine using inductive machine learning},
  	Url = {./papers/ganzert2002analysis.pdf},
  	Volume = {26},
  	Year = {2002},
  	Bdsk-Url-1 = {./papers/ganzert2002analysis.pdf}}

  @misc{kersting2001blps,
  	Author = {Kristian Kersting and Luc {De Raedt}},
  	Howpublished = {CoRR},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Bayesian Networks, Logic Programming},
  	Note = {Bayesian networks provide an elegant formalism for representing and reasoning about uncertainty using probability theory. They are a probabilistic extension of propositional logic and, hence, inherit some of the limitations of propositional logic, such as the difficulties to represent objects and relations. We introduce a generalization of Bayesian networks, called Bayesian logic programs, to overcome these limitations. In order to represent objects and relations it combines Bayesian networks with definite clause logic by establishing a one-to-one mapping between ground atoms and random variables. We show that Bayesian logic programs combine the advantages of both definite clause logic and Bayesian networks. This includes the separation of quantitative and qualitative aspects of the model. Furthermore, Bayesian logic programs generalize both Bayesian networks as well as logic programs. So, many ideas developed in both areas carry over.},
  	Title = {Bayesian Logic Programs},
  	Url = {https://arxiv.org/pdf/cs/0111058.pdf},
  	Volume = {cs.AI/0111058},
  	Year = {2001},
  	Bdsk-Url-1 = {https://arxiv.org/pdf/cs/0111058.pdf}}

  @inproceedings{kerstingR01,
  	Author = {Kristian Kersting and Luc {De Raedt}},
  	Booktitle = {Proceedings of the 11th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Bayesian Networks, Logic Programming},
  	Note = {First order probabilistic logics combine a first order logic with a probabilistic knowledge representation. In this context, we introduce continuous Bayesian logic programs, which extend the recently introduced Bayesian logic programs to deal with continuous random variables. Bayesian logic programs tightly integrate definite logic programs with Bayesian networks. The resulting framework nicely seperates the qualitative (i.e. logical) component from the quantitative (i.e. the probabilistic) one. We also show how the quantitative component can be learned using a gradient-based maximum likelihood method.},
  	Pages = {104--117},
  	Title = {Adaptive Bayesian Logic Programs},
  	Url = {./papers/kerstingR01.pdf},
  	Year = {2001},
  	Bdsk-Url-1 = {./papers/kerstingR01.pdf}}

  @inproceedings{kersting2001towards,
  	Author = {Kristian Kersting and Luc De Raedt},
  	Booktitle = {Proceedings of the 11th International Conference on Inductive Logic Programming (ILP)},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Bayesian Networks, Logic Programming, Inductive Logic Programming, Structure Learning},
  	Note = {Recently, new representation languages that integrate first order logic with Bayesian networks have been developed. Bayesian logic programs are one of these languages. In this paper, we present results on combining Inductive Logic Programming (ILP) with Bayesian networks to learn both the qualitative and the quantitative components of Bayesian logic programs. More precisely, we show how to combine the ILP setting learning from interpretations with score-based techniques for learning Bayesian networks. Thus, the paper positively answers Koller and Pfeffer's question, whether techniques from ILP could help to learn the logical component of first order probabilistic models.},
  	Organization = {Springer},
  	Pages = {118--131},
  	Title = {Towards combining inductive logic programming with Bayesian networks},
  	Url = {./papers/kersting2001towards.pdf},
  	Year = {2001},
  	Bdsk-Url-1 = {./papers/kersting2001towards.pdf}}

  @incollection{kersting2000interpreting,
  	Author = {Kristian Kersting and Luc De Raedt and Stefan Kramer},
  	Booktitle = {Working Notes of the AAAI workshop on learning statistical models from relational data (SRL)},
  	Keywords = {Statistical Relational Learning, Bayesian Logic Programs, Bayesian Networks, Logic Programming},
  	Note = {Various proposals for combining first order logic with Bayesian nets exist. We introduce the formalism of Bayesian logic programs, which is basically a simplification and reformulation of Ngo and Haddawys probabilistic logic programs. However, Bayesian logic programs are sufficiently powerful to represent essentially the same knowledge in a more elegant manner. The elegance is illustrated by the fact that they can represent both Bayesian nets and definite clause programs (as in pure Prolog) and that their kernel in Prolog is actually an adaptation of an usual Prolog meta-interpreter.},
  	Pages = {29--35},
  	Title = {Interpreting Bayesian logic programs},
  	Url = {./papers/kersting2000interpreting.pdf},
  	Year = {2000},
  	Bdsk-Url-1 = {./papers/kersting2000interpreting.pdf}}

  @inbook{Hansel2021,
    anote={./images/BenchmarkingNaturalGradient.png},
    author={Kay Hansel and Janosch Moos and Cedric Derstroff},
    editor={Belousov, Boris and Abdulsamad, Hany and Klink, Pascal and Parisi, Simone and Peters, Jan},
    title={Benchmarking the Natural Gradient in Policy Gradient Methods and Evolution Strategies},
    bookTitle={Reinforcement Learning Algorithms: Analysis and Applications},
    year={2021},
    publisher={Springer International Publishing},
    address={Cham},
    pages={69--84},
    note={Reinforcement learning offers various gradient-based algorithms to tackle model-free optimization problems. However, a major drawback of such techniques is premature convergence due to a lack of exploration. This paper reviews several Evolution Strategies and policy gradient algorithms using the natural gradient in order to reach better convergence. By incorporating knowledge of second-order derivatives, the natural gradient prevents premature convergence. We will present benchmarks on the Trust Region Policy Optimization (TRPO) as a natural policy gradient algorithm and on Separable Natural Evolution Strategies (sNES) and compare results and implementations of these two algorithms. Our experience has shown that sNES performs better in most benchmarks while being easier to handle due to having less hyperparameters.},
    isbn={978-3-030-41188-6},
    doi={10.1007/978-3-030-41188-6_7},
    url={https://doi.org/10.1007/978-3-030-41188-6_7},
    keywords={Benchmarks, Reinforcement Learning, Natural Gradient, Evolution Strategies}}

  @misc{kraus2024right,
      anote={./images/kraus2024right.png},
      title={Right on Time: Revising Time Series Models by Constraining their Explanations},
      author={Maurice Kraus and David Steinmann and Antonia Wüst and Andre Kokozinski and Kristian Kersting},
      year={2024},
      Note = {The reliability of deep time series models is often compromised by their tendency to rely on confounding factors, which may lead to incorrect outputs. Our newly recorded, naturally confounded dataset named P2S from a real mechanical production line emphasizes this. To avoid “Clever-Hans” moments in time series, i.e., to mitigate confounders, we introduce the method Right on Time (RioT). RioT enables, for the first time interactions with model explanations across both the time and frequency domain. Feedback on explanations in both domains is then used to constrain the model, steering it away from the annotated confounding factors. The dual-domain interaction strategy is crucial for effectively addressing confounders in time series datasets. We empirically demonstrate that RioT can effectively guide models away from the wrong reasons in P2S as well as popular time series classification and forecasting datasets.The reliability of deep time series models is often compromised by their tendency to rely on confounding factors, which may lead to incorrect outputs. Our newly recorded, naturally confounded dataset named P2S from a real mechanical production line emphasizes this. To avoid “Clever-Hans” moments in time series, i.e., to mitigate confounders, we introduce the method Right on Time (RioT). RioT enables, for the first time interactions with model explanations across both the time and frequency domain. Feedback on explanations in both domains is then used to constrain the model, steering it away from the annotated confounding factors. The dual-domain interaction strategy is crucial for effectively addressing confounders in time series datasets. We empirically demonstrate that RioT can effectively guide models away from the wrong reasons in P2S as well as popular time series classification and forecasting datasets.},
      Howpublished={arXiv preprint arXiv:2402.12921},
	    Url={https://arxiv.org/pdf/2402.12921},
      Pages = {},
      Crossref = {https://github.com/ml-research/RioT},
      Keywords = {Explainability, Explanatory Interactive Machine Learning, Time Series, XAI, XIL}
  }

@incollection{kraus2024aright,
      anote={./images/kraus2024aright.png},
      title={Right on Time: Revising Time Series Models by Constraining their Explanations},
      author={Maurice Kraus and David Steinmann and Antonia Wüst and Andre Kokozinski and Kristian Kersting},
      booktitle = {Working Notes of the NeurIPS Workshop on Interpretable AI: Past, Present and Future},
      year={2024},
      Url = {https://openreview.net/pdf?id=HmuMTOWdiK},
      Note = {The reliability of deep time series models is often compromised by their tendency to rely on confounding factors, which may lead to incorrect outputs. Our newly recorded, naturally confounded dataset named P2S from a real mechanical production line emphasizes this. To avoid “Clever-Hans” moments in time series, i.e., to mitigate confounders, we introduce the method Right on Time (RioT). RioT enables, for the first time interactions with model explanations across both the time and frequency domain. Feedback on explanations in both domains is then used to constrain the model, steering it away from the annotated confounding factors. The dual-domain interaction strategy is crucial for effectively addressing confounders in time series datasets. We empirically demonstrate that RioT can effectively guide models away from the wrong reasons in P2S as well as popular time series classification and forecasting datasets.The reliability of deep time series models is often compromised by their tendency to rely on confounding factors, which may lead to incorrect outputs. Our newly recorded, naturally confounded dataset named P2S from a real mechanical production line emphasizes this. To avoid “Clever-Hans” moments in time series, i.e., to mitigate confounders, we introduce the method Right on Time (RioT). RioT enables, for the first time interactions with model explanations across both the time and frequency domain. Feedback on explanations in both domains is then used to constrain the model, steering it away from the annotated confounding factors. The dual-domain interaction strategy is crucial for effectively addressing confounders in time series datasets. We empirically demonstrate that RioT can effectively guide models away from the wrong reasons in P2S as well as popular time series classification and forecasting datasets.},
      Crossref = {https://github.com/ml-research/RioT},
      Keywords = {Explainability, Explanatory Interactive Machine Learning, Time Series, XAI, XIL}
  }

  @misc{stammer2024neural,
    anote={./images/stammer2024neural.png},
    title={Neural Concept Binder},
    author={Wolfgang Stammer and Antonia Wüst and David Steinmann and Kristian Kersting},
    Howpublished={arXiv preprint arXiv:2406.09949},
    Pages={},
    Crossref={https://github.com/ml-research/neuralconceptbinder},
    Url={https://arxiv.org/pdf/2406.09949},
    year={2024},
    note={The challenge in object-based visual reasoning lies in generating descriptive yet distinct concept representations. Moreover, doing this in an unsupervised fashion requires human users to understand a model's learned concepts and potentially revise false concepts. In addressing this challenge, we introduce the Neural Concept Binder, a new framework for deriving discrete concept representations resulting in what we term "concept-slot encodings". These encodings leverage both "soft binding" via object-centric block-slot encodings and "hard binding" via retrieval-based inference. The Neural Concept Binder facilitates straightforward concept inspection and direct integration of external knowledge, such as human input or insights from other AI models like GPT-4. Additionally, we demonstrate that incorporating the hard binding mechanism does not compromise performance; instead, it enables seamless integration into both neural and symbolic modules for intricate reasoning tasks, as evidenced by evaluations on our newly introduced CLEVR-Sudoku dataset.},
    Keywords={Concept Discovery, Interpretable Artificial Intelligence, Interactive Machine Learning, Disentanglement}
  }

@inproceedings{moos2024foosball,
  author={Janosch Moos, and Cedric Derstroff, and Niklas Schröder, and Debora Clever,},
  booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)},
  title={Learning to Play Foosball: System and Baselines},
  year={2024},
  volume={},
  number={},
  pages={4326-4332},
  keywords={Reinforcement Learning, Simulation,Robot learning},
  doi={10.1109/ICRA57147.2024.10611321},
  Anote={./images/point_cloud_paper.PNG},
  Note={This work stages Foosball as a versatile platform for advancing scientific research, particularly in the realm of robot learning. We present an automated Foosball table along with its corresponding simulated counterpart, showcasing a diverse range of challenges through example tasks within the Foosball environment. Initial findings are shared using a simple baseline approach.
Foosball constitutes a versatile learning environment with the potential to yield cutting-edge research in various fields of artificial intelligence and machine learning, notably robust learning, while also extending its applicability to industrial robotics and automation setups.
To transform our physical Foosball table into a research-friendly system, we augmented it with a 2 degrees of freedom kinematic chain to control the goalkeeper rod as an initial setup with the intention to be extended to the full game as soon as possible. Our experiments reveal that a realistic simulation is essential for mastering complex robotic tasks, yet translating these accomplishments to the real system remains challenging, often accompanied by a performance decline.
This emphasizes the critical importance of research in this direction. In this concern, we spotlight the automated Foosball table as an invaluable tool, possessing numerous desirable attributes, to serve as a demanding learning environment for advancing robotics and automation research.}
}