<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd">
<!--<?xml-stylesheet type="text/xsl" href="article.xsl"?>-->
<article article-type="research-article" dtd-version="1.2" xml:lang="en"
    xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <front>
        <journal-meta>
            <journal-id journal-id-type="issn">1683-1470</journal-id>
            <journal-title-group>
                <journal-title>Data Science Journal</journal-title>
            </journal-title-group>
            <issn pub-type="epub">1683-1470</issn>
            <publisher>
                <publisher-name>Ubiquity Press</publisher-name>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.5334/dsj-2021-008</article-id>
            <article-categories>
                <subj-group>
                    <subject>Research paper</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Kadi4Mat: A Research Data Infrastructure for Materials
                    Science</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0002-3860-1376</contrib-id>
                    <name>
                        <surname>Brandt</surname>
                        <given-names>Nico</given-names>
                    </name>
                    <email>nico.brandt@kit.edu</email>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0002-8093-6356</contrib-id>
                    <name>
                        <surname>Griem</surname>
                        <given-names>Lars</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0001-8208-4356</contrib-id>
                    <name>
                        <surname>Herrmann</surname>
                        <given-names>Christoph</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0001-6821-7263</contrib-id>
                    <name>
                        <surname>Schoof</surname>
                        <given-names>Ephraim</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-2">2</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0001-5128-4080</contrib-id>
                    <name>
                        <surname>Tosato</surname>
                        <given-names>Giovanna</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0001-9440-138X</contrib-id>
                    <name>
                        <surname>Zhao</surname>
                        <given-names>Yinghan</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0002-4821-5719</contrib-id>
                    <name>
                        <surname>Zschumme</surname>
                        <given-names>Philipp</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid"
                        >https://orcid.org/0000-0002-9756-646X</contrib-id>
                    <name>
                        <surname>Selzer</surname>
                        <given-names>Michael</given-names>
                    </name>
                    <xref ref-type="aff" rid="aff-1">1</xref>
                    <xref ref-type="aff" rid="aff-3">3</xref>
                </contrib>
            </contrib-group>
            <aff id="aff-1"><label>1</label>Institute for Applied Materials (IAM-CMS), Karlsruhe
                Institute of Technology (KIT), Stra&#223;e am Forum 7, 76131 Karlsruhe,
                Germany</aff>
            <aff id="aff-2"><label>2</label>Helmholtz Institute Ulm for Electrochemical Energy
                Storage (HIU), Helmholtzstra&#223;e 11, 89081 Ulm, Germany</aff>
            <aff id="aff-3"><label>3</label>Institute for Digital Materials Science (IDM), Karlsruhe
                University of Applied Sciences, Moltkestra&#223;e 30, 76133 Karlsruhe, Germany</aff>
            <pub-date publication-format="electronic" date-type="pub" iso-8601-date="2021-02-10">
                <day>10</day>
                <month>02</month>
                <year>2021</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2021</year>
            </pub-date>
            <volume>20</volume>
            <elocation-id>8</elocation-id>
            <history>
                <date date-type="received" iso-8601-date="2020-10-16">
                    <day>16</day>
                    <month>10</month>
                    <year>2020</year>
                </date>
                <date date-type="accepted" iso-8601-date="2021-01-27">
                    <day>27</day>
                    <month>01</month>
                    <year>2021</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00A9; 2021 The Author(s)</copyright-statement>
                <copyright-year>2021</copyright-year>
                <license license-type="open-access"
                    xlink:href="http://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open-access article distributed under the terms of the
                        Creative Commons Attribution 4.0 International License (CC-BY 4.0), which
                        permits unrestricted use, distribution, and reproduction in any medium,
                        provided the original author and source are credited. See <uri
                            xlink:href="http://creativecommons.org/licenses/by/4.0/"
                            >http://creativecommons.org/licenses/by/4.0/</uri>.</license-p>
                </license>
            </permissions>
            <self-uri xlink:href="http://datascience.codata.org/articles/10.5334/dsj-2021-008/"/>
            <abstract>
                <p>The concepts and current developments of a research data infrastructure for
                    materials science are presented, extending and combining the features of an
                    electronic lab notebook and a repository. The objective of this infrastructure
                    is to incorporate the possibility of structured data storage and data exchange
                    with documented and reproducible data analysis and visualization, which finally
                    leads to the publication of the data. This way, researchers can be supported
                    throughout the entire research process. The software is being developed as a
                    web-based and desktop-based system, offering both a graphical user interface and
                    a programmatic interface. The focus of the development is on the integration of
                    technologies and systems based on both established as well as new concepts. Due
                    to the heterogeneous nature of materials science data, the current features are
                    kept mostly generic, and the structuring of the data is largely left to the
                    users. As a result, an extension of the research data infrastructure to other
                    disciplines is possible in the future. The source code of the project is
                    publicly available under a permissive Apache 2.0 license.</p>
            </abstract>
            <kwd-group>
                <kwd>research data management</kwd>
                <kwd>electronic lab notebook</kwd>
                <kwd>repository</kwd>
                <kwd>open source</kwd>
                <kwd>materials science</kwd>
            </kwd-group>
        </article-meta>
    </front>
    <body>
        <sec>
            <title>1 Introduction</title>
            <p>In engineering sciences, the handling of digital research data plays an increasingly
                important role in all fields of application (<xref ref-type="bibr" rid="B35"
                    >Sandfeld et al. 2018</xref>). This is especially the case, due to the growing
                amount of data obtained from experiments and simulations (<xref ref-type="bibr"
                    rid="B23">Hey &amp; Trefethen 2003</xref>). The extraction of knowledge from
                these data is referred to as a data-driven, fourth paradigm of science, filed under
                the keyword data science (<xref ref-type="bibr" rid="B22">Hey 2009</xref>). This is
                particularly true in materials science, as the research and understanding of new
                materials are becoming more and more complex (<xref ref-type="bibr" rid="B24">Hill
                    et al. 2016</xref>). Without suitable analysis methods, the ever-growing amount
                of data will no longer be manageable. In order to be able to perform appropriate
                data analyses smoothly, the structured storage of research data and associated
                metadata is an important aspect. Specifically, a uniform research data management is
                needed, which is made possible by appropriate infrastructures such as research data
                repositories. In addition to uniform data storage, such systems can help to overcome
                inter-institutional hurdles in data exchange, compare theoretical and experimental
                data and provide reproducible workflows for data analysis. Furthermore, linking the
                data with persistent identifiers enables other researchers to directly reference
                them in their work.</p>
            <p>In particular, repositories for the storage and internal or public exchange of
                research data are becoming more and more widespread. Especially the publication of
                such data, either on its own or as a supplement to a text publication, is
                increasingly encouraged or sometimes even required (<xref ref-type="bibr" rid="B28"
                    >Naughton &amp; Kernohan 2016</xref>). In order to find a suitable repository,
                services such as re3data (<xref ref-type="bibr" rid="B31">Pampel et al. 2013</xref>)
                or FairSharing (<xref ref-type="bibr" rid="B42">The FAIRsharing Community et al.
                    2019</xref>) are available. These services also make it possible to find
                subject-specific repositories for materials science data. Two well-known examples
                are the Materials Project (<xref ref-type="bibr" rid="B25">Jain et al. 2013</xref>)
                and the NOMAD Repository (<xref ref-type="bibr" rid="B12">Drax &amp; Scheffler
                    2018</xref>). Indexed repositories are usually hosted centrally or
                institutionally, and are mostly used for the publication of data. However, some of
                the underlying systems can also be installed by the user, such as for internal use
                within individual research groups. Additionally, this allows full control over
                stored data as well as internal data exchanges, if this function is not already part
                of the repository. In this respect, open-source systems are particularly important,
                as this means independence from vendors and opens up the possibility of modifying
                the existing functionality or adding additional features, sometimes via built-in
                plug-in systems. Examples of such systems are Ckan (<xref ref-type="bibr" rid="B9"
                    >CKAN Association 2014</xref>), Dataverse (<xref ref-type="bibr" rid="B26">King
                    2007</xref>), DSpace (<xref ref-type="bibr" rid="B40">Smith et al. 2003</xref>)
                or Invenio (<xref ref-type="bibr" rid="B16">CERN 2016</xref>), where the latter is
                the basis of Zenodo (<xref ref-type="bibr" rid="B17">CERN &amp; OpenAIRE
                2013</xref>). The listed repositories are all generic and represent only a selection
                of the existing open-source systems (<xref ref-type="bibr" rid="B2">Amorim et al.
                    2017</xref>).</p>
            <p>A second type of system in addition to the repositories, which is also increasingly
                used in experimentally oriented research areas, are the electronic lab notebooks
                (ELN) (<xref ref-type="bibr" rid="B34">Rubacha, Rattan &amp; Hosselet 2011</xref>).
                Nowadays, the functionality of ELNs goes far beyond the simple replacement of
                paper-based lab notebooks, and can also include aspects such as data analysis, as
                seen, for example, in Galaxy (<xref ref-type="bibr" rid="B1">Afgan et al.
                    2018</xref>) or Jupyter Notebooks (<xref ref-type="bibr" rid="B27">Kluyver et
                    al. 2016</xref>). Both systems focus primarily on providing accessible and
                reproducible computational research. Specifically, the boundary between unstructured
                and structured data is more and more blurred, the latter being traditionally only
                found in laboratory information management systems (LIMS) (<xref ref-type="bibr"
                    rid="B3">Bird, Willoughby &amp; Frey 2013</xref>; <xref ref-type="bibr"
                    rid="B14">Elliott 2009</xref>; <xref ref-type="bibr" rid="B41">Taylor
                    2006</xref>). Most existing ELNs are domain-specific and limited to research
                disciplines such as biology or chemistry (<xref ref-type="bibr" rid="B41">Taylor
                    2006</xref>). According to current knowledge, a system specifically tailored to
                materials science does not exist. For ELNs, there are also open-source systems such
                as eLabFTW (<xref ref-type="bibr" rid="B7">CARPi, Minges &amp; Piel 2017</xref>),
                SciNote (<xref ref-type="bibr" rid="B39">SciNote LLC 2015</xref>) or Chemotion
                    (<xref ref-type="bibr" rid="B45">Tremouilhac et al. 2017</xref>). Compared to
                the repositories, however, the selection of ELNs is smaller. Furthermore, only the
                first two mentioned systems are generic.</p>
            <p>Thus, generic research data systems and software are available for both ELNs and
                repositories, which, in principle, could also be used in materials science. The
                listed open-source solutions are of particular relevance, as they can be adapted to
                different needs and are generally suitable for use in a custom installation within
                single research groups. However, both aspects can be a considerable hurdle,
                especially for smaller groups. Due to a lack of resources, a structured research
                data management and the possibility of making data available for subsequent use is
                therefore particularly difficult for such groups (<xref ref-type="bibr" rid="B30">P.
                    Bryan Heidorn 2008</xref>). What is finally missing is a system that can be
                deployed and used both centrally and decentrally, as well as internally and
                publicly, without major obstacles. The system should support researchers throughout
                the entire research process, starting with the generation and extraction of raw
                data, up to the structured storage, exchange and analysis of the data, resulting in
                the final publication of the corresponding results. In this way, the features of the
                ELN and the repository are combined, creating a virtual research environment (<xref
                    ref-type="bibr" rid="B8">Carusi &amp; Reimer 2010</xref>) that accelerates the
                generation of innovations by facilitating the collaboration between researchers. In
                an interdisciplinary field like materials sciences, there is a special need to model
                the very heterogeneous workflows of the researchers (<xref ref-type="bibr" rid="B24"
                    >Hill et al. 2016</xref>).</p>
            <p>For this purpose, the research data infrastructure Kadi4Mat (Karlsruhe Data
                Infrastructure for Materials Sciences) is being developed at the Institute for
                Applied Materials (IAM-CMS) of the Karlsruhe Institute of Technology (KIT). The
                current logo of the project is shown in <bold><italic><xref ref-type="fig" rid="F1"
                            >Figure 1</xref></italic></bold>. The aim of the software is to combine
                the possibility of structured data storage with documented and reproducible
                workflows for data analysis and visualization tasks, incorporating new concepts with
                established technologies and existing solutions. In the development of the software,
                the FAIR principles (<xref ref-type="bibr" rid="B47">Wilkinson et al. 2016</xref>)
                for scientific data management are taken into account. Instances of the data
                infrastructure have already been deployed and show how structured data storage and
                data exchange are made possible (<xref ref-type="bibr" rid="B4">Brandt 2020</xref>).
                Furthermore, the source code of the project is publicly available under a permissive
                Apache 2.0 license (<xref ref-type="bibr" rid="B5">Brandt et al. 2020</xref>).</p>
            <fig id="F1">
                <label>Figure 1</label>
                <caption>
                    <p>Logo of Kadi4Mat.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g1.png"
                />
            </fig>
        </sec>
        <sec>
            <title>2 Concepts</title>
            <p>Kadi4Mat is logically divided into the two components ELN and repository, which have
                access to various tools and technical infrastructures. The components can be used by
                web- and desktop-based applications, via uniform interfaces. Both a graphical and a
                programmatic interface are provided, using machine-readable formats and various
                exchange protocols. In <bold><italic><xref ref-type="fig" rid="F2">Figure
                        2</xref></italic></bold>, a conceptual overview of the infrastructure of
                Kadi4Mat is presented.</p>
            <fig id="F2">
                <label>Figure 2</label>
                <caption>
                    <p>Conceptual overview of the infrastructure of Kadi4Mat. The system is
                        logically divided into the two components ELN and repository, which have
                        access to various data handling tools and technical infrastructures. The two
                        components can be used both graphically and programmatically via uniform
                        interfaces.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g2.png"
                />
            </fig>
            <sec>
                <title>2.1 Electronic Lab Notebook</title>
                <p>In the ELN component, the so-called <italic>workflows</italic> are of particular
                    importance. A workflow is a generic concept that describes a well-defined
                    sequence of sequential or parallel steps, which are processed as automatically
                    as possible. This can include the execution of an analysis tool or the control
                    and data retrieval of an experimental device. To accommodate such heterogeneity,
                    the concrete steps must be implemented as flexibly as possible, since they are
                    highly user- and application-specific. In <bold><italic><xref ref-type="fig"
                                rid="F2">Figure 2</xref></italic></bold>, the types of tools shown
                    in the second layer are used as part of the workflows, so as to implement the
                    actual functionality of the various steps. These can be roughly divided into
                    analysis, visualization, transformation and transportation tasks. In order to
                    keep the application of these tools as generic as possible, a combination of
                    provided and user-defined tools is accessed. From a user&#8217;s perspective, it
                    must be possible to provide such tools in an easy manner, while the execution of
                    each tool must take place in a secure and functional environment. This is
                    especially true for existing tools, e.g. a simple MATLAB (<xref ref-type="bibr"
                        rid="B43">The MathWorks, Inc. 2021</xref>) script, which require certain
                    dependencies to be executed and must be equipped with a suitable interface to be
                    used within a workflow. Depending on their functionality, the tools must in turn
                    access various technical infrastructures. In addition to the use of the
                    repository and computing infrastructure, direct access to devices is also
                    important for more complex data analyses. The automation of a typical workflow
                    of experimenters is only fully possible if data and metadata, created by
                    devices, can be captured. However, such an integration is not trivial, due to a
                    heterogeneous device landscape and proprietary data formats and interfaces
                        (<xref ref-type="bibr" rid="B21">Hawker 2007</xref>; <xref ref-type="bibr"
                        rid="B33">Potthoff et al. 2019</xref>). In Kadi4Mat, it should also be
                    possible to use individual tools separately, where appropriate, i.e. outside a
                    workflow. For example, a visualization tool for a custom data format may be used
                    to generate a preview of a datum that can be directly displayed in a web
                    browser, when using the web-based interface.</p>
                <p>In <bold><italic><xref ref-type="fig" rid="F3">Figure 3</xref></italic></bold>,
                    the current concept for the integration of the workflows in Kadi4Mat is shown.
                    Different steps of a workflow can be defined with a graphical node editor.
                    Either a web-based or a desktop-based version of such an editor can be used, the
                    latter running as an ordinary application on a local workstation. With the help
                    of such an editor, the different steps or tools to be executed are defined,
                    linked and, most importantly, parameterized. The execution of a workflow can be
                    started via an external component called <italic>process manager</italic>. This
                    component in turn manages several <italic>process engines</italic>, which take
                    care of executing the workflows. The process engines potentially differ in their
                    implementation and functionality. A simple process engine, for example, could be
                    limited to a sequential execution order of the different tasks, while another
                    one could execute independent tasks in parallel. All engines process the
                    required steps based on the information stored in the workflow. With appropriate
                    transport tools, the data and metadata required for each step, as well as the
                    resulting output, can be exported or imported from Kadi4Mat, using the existing
                    interfaces of the research data infrastructure. With similar tools, the use of
                    other external data sources becomes possible, and with it the possibility to
                    handle large amounts of data via suitable exchange protocols. The use of locally
                    stored data is also possible when running a workflow on a local workstation.</p>
                <fig id="F3">
                    <label>Figure 3</label>
                    <caption>
                        <p>Conceptual overview of the workflow architecture. Each workflow is
                            defined using a graphical editor that is either directly integrated into
                            the web-based interface of Kadi4Mat or locally, with a desktop
                            application. The process manager provides an interface for executing
                            workflows and communicates on behalf of the user with multiple process
                            engines, to which the actual execution of workflows is delegated. The
                            engines are responsible for the actual processing of the different
                            steps, based on the information defined in a workflow. Data and metadata
                            can either be stored externally or locally.</p>
                    </caption>
                    <graphic xmlns:xlink="http://www.w3.org/1999/xlink"
                        xlink:href="dsj-20-1282-g3.png"/>
                </fig>
                <p>Since the reproducibility of the performed steps is a key objective of the
                    workflows, all meaningful information and metadata can be logged along the way.
                    The logging needs to be flexible, in order to accommodate different individual
                    or organizational needs, and therefore is also part of the workflow itself.
                    Workflows can also be shared with other users, for example via Kadi4Mat. Manual
                    steps may require interaction during the execution of a workflow, for which the
                    system must prompt the user. In summary, the focus of the ELN component thus
                    points in a different direction than in classic ELNs, with the emphasis on the
                    automation of the steps performed. This aspect in particular is similar to
                    systems such as Galaxy (<xref ref-type="bibr" rid="B1">Afgan et al.
                    2018</xref>), which focuses on computational biology, or Taverna (<xref
                        ref-type="bibr" rid="B48">Wolstencroft et al. 2013</xref>), a dedicated
                    workflow management system. Nevertheless, some typical features of classic ELNs
                    are also considered in the ELN component, such as the inclusion of handwritten
                    notes.</p>
            </sec>
            <sec>
                <title>2.2 Repository</title>
                <p>In the repository component, data management is regarded as the central element,
                    especially the structured data storage and exchange. An important aspect is the
                    enrichment of data with corresponding descriptive metadata, which is required
                    for its description, analysis or search. Many repositories, especially those
                    focused on publishing research data, use the metadata schema provided by
                    DataCite (<xref ref-type="bibr" rid="B10">DataCite Metadata Working Group
                        2019</xref>), and are either directly or heavily based on it. This schema is
                    widely supported and enables the direct publication of data, via the
                    corresponding DataCite service. For use cases that go beyond data publications,
                    it is limited in its descriptive power, at the same time. There are
                    comparatively few subject-specific schemas available for engineering and
                    material sciences. Two examples are EngMeta (<xref ref-type="bibr" rid="B36"
                        >Schembera &amp; Iglezakis 2020</xref>) and NOMAD Meta Info (<xref
                        ref-type="bibr" rid="B19">Ghiringhelli et al. 2017</xref>). The first schema
                    is created a priori and aims to provide a generic description of computer-aided
                    engineering data, while the second schema is created a posteriori, using
                    existing computing inputs and outputs from the database of the NOMAD
                    repository.</p>
                <p>The second approach is also pursued in a similar way in Kadi4Mat. Instead of a
                    fixed metadata schema, the concrete structure is largely determined by the users
                    themselves, and thus is oriented towards their specific needs. To aid with
                    establishing common metadata vocabularies, a mechanism to create templates is
                    provided. Templates can impose certain restrictions and validations on certain
                    metadata. They are user-defined and can be shared within workgroups or projects,
                    facilitating the establishment of metadata standards. Nevertheless, individual,
                    generic metadata fields, such as a title or description of a data set, can be
                    static. For different use cases such as data analysis, publishing or the
                    interoperability with other systems, additional conversions must be provided.
                    This is not only necessary because of differing data formats, but also to map
                    vocabularies of different schemas accordingly. Such converted metadata can
                    either represent a subset of existing schemas or require additional fields, such
                    as a license for the re-use of published data. In the long run, the objective in
                    Kadi4Mat is to offer well-defined structures and semantics, by making use of
                    ontologies. In the field of materials science, there are ongoing developments in
                    this respect, such as the European Materials Modelling Ontology (<xref
                        ref-type="bibr" rid="B15">EMMC 2019</xref>). However, a bottom-up procedure
                    is considered as a more flexible solution, with the objective to generate an
                    ontology from existing metadata and relationships between different data sets.
                    Such a two-pronged approach aims to be functional in the short term, while still
                    staying extensible in the long term (<xref ref-type="bibr" rid="B20">Greenberg
                        et al. 2009</xref>), although it heavily depends on how users manage their
                    data and metadata with the options available.</p>
                <p>In addition to the metadata, the actual data must be managed as well. Here, one
                    can distinguish between data managed directly by Kadi4Mat and linked data. In
                    the simplest form, the former resides on a file system accessible by the
                    repository, which means full control over the data. This requires a copy of each
                    datum to be made available in Kadi4Mat, which makes it less suitable for very
                    large amounts of data. The same applies to data analyses that are to be carried
                    out on external computing infrastructures and must access the data for this
                    purpose. Linked data, on the other hand, can be located on external data storage
                    devices, e.g. high-performance computing infrastructures. This also makes it
                    possible to integrate existing infrastructures and repositories. In these cases,
                    Kadi4Mat can simply offer a view on top of such infrastructures or a more direct
                    integration, depending on the concrete system in question.</p>
                <p>A further point to be addressed within the repository is the publication of data
                    and metadata, including templates and workflows, that require persistent
                    identifiers to be referenceable. Many existing repositories and systems are
                    already specialized in exactly this use case and offer infrastructures for the
                    long-term archiving of large amounts of data. Thus, an integration of suitable
                    external systems is to be considered for this task in particular. From
                    Kadi4Mat&#8217;s point of view, only certain basic requirements have to be
                    ensured in order to enable the publishing of data. These include the assignment
                    of a unique identifier within the system, the provision of metadata and
                    licenses, necessary for a publication, and a basic form of user-guided quality
                    control. The repository component thus also goes in a different direction than
                    classic repositories. In a typical scientific workflow, it is primarily focused
                    on all steps that take place between the initial data acquisition and the
                    publishing of data. The component is therefore best described as a
                        <italic>community repository</italic> that manages <italic>warm</italic>
                    data, i.e. unpublished data that needs further analysis, and enables data
                    exchange within specific communities, for example within a research group or
                    project.</p>
            </sec>
        </sec>
        <sec>
            <title>3 Implementation</title>
            <p>Kadi4Mat is built as a web-based application that employs a classic client-server
                architecture. A graphical front end is provided to be used with a normal web browser
                as a client, while the server is responsible for the handling of the back end and
                the integration of external systems. A high-level overview of the implementation is
                shown in <bold><italic><xref ref-type="fig" rid="F4">Figure
                4</xref></italic></bold>. The front end is based on the classic web technologies
                JavaScript, HTML and CSS. In particular, the client-side JavaScript web framework
                Vue.js (<xref ref-type="bibr" rid="B46">Vue Core Development Team 2014</xref>) is
                used. The framework is especially suitable for the creation of single-page web
                applications (SPA), but can also be used for individual sections of more classic
                applications, to incrementally add complex and dynamic user interface components to
                certain pages. Vue.js is mainly used for the latter, the benefit being a clear
                separation between the data and the presentation layer, as well as the easier re-use
                of user interface components. This aspect is combined with server-side rendering.
                Due to the technologies and standards employed, the use of the front end is
                currently limited to recent versions of modern web browsers such as Firefox, Chrome
                or Edge.</p>
            <fig id="F4">
                <label>Figure 4</label>
                <caption>
                    <p>Overview of the implementation of Kadi4Mat, separated into front end and back
                        end. The front end uses classic web technologies and is usually operated via
                        a web browser. In the back end, the functionality is split into the web and
                        the core component. The former takes care of the external interfaces, while
                        the latter contains most of the core functionality and handles the
                        interfaces of other systems. A plugin component is also shown, which can be
                        used to customize or extend the functionality of the system.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g4.png"
                />
            </fig>
            <p>In the back end, the framework Flask (<xref ref-type="bibr" rid="B44">The Pallets
                    Projects 2015</xref>) is used for the web component. The framework is
                implemented in Python and is compatible with the common web server gateway interface
                (WSGI), which specifies an interface between web servers and Python applications. As
                a so-called microframework, the functionality of Flask itself is limited to the
                basic features. This means that most of the functionality, which is unrelated to the
                web component, has to be added by custom code or suitable libraries. At the same
                time, more freedom is offered in the concrete choice of technologies. This is in
                direct contrast to web frameworks such as Django (<xref ref-type="bibr" rid="B11"
                    >DSF 2005</xref>), which already provides a lot of functionality from scratch.
                The web component itself is responsible for handling client requests for specific
                endpoints and assigning them to the appropriate Python functions. Currently, either
                HTML or JSON is returned, depending on the endpoint. The latter is used as part of
                an HTTP API, to enable an internal and external programmatic data exchange. This API
                is based on the representational state transfer (REST) paradigm (<xref
                    ref-type="bibr" rid="B18">Fielding &amp; Taylor 2000</xref>). Support for other
                exchange formats could also be relevant in the future, particularly for implementing
                certain exchange formats for interoperability, such as OAI-PMH (<xref
                    ref-type="bibr" rid="B29">OAI 2015</xref>). Especially for handling larger
                amounts of data, other exchange protocols besides HTTP are considered.</p>
            <p>A large part of the application consists of the core functionality, which is divided
                into different modules, as shown in <bold><italic><xref ref-type="fig" rid="F4"
                            >Figure 4</xref></italic></bold>. This structure is mainly of an
                organizational nature. A microservice architecture is currently not implemented.
                Modules that access external components are particularly noteworthy, which is an
                aspect that will also be increasingly important in the future. External components
                can either run on the same hardware as Kadi4Mat itself or on separate systems
                available via a network interface. For the storage of metadata, the persistence
                module makes use of the relational database management system PostgreSQL (<xref
                    ref-type="bibr" rid="B32">PostgreSQL Global Development Group 1996</xref>),
                while the regular file system stores the actual data. Additionally, the software
                Elasticsearch (<xref ref-type="bibr" rid="B13">Elastic NV 2010</xref>) is used to
                index all the metadata that needs to be efficiently searchable. The aforementioned
                process manager (<xref ref-type="bibr" rid="B50">Zschumme 2021b</xref>), which is
                currently implemented as a command line application, manages the execution of
                workflows by delegating each execution task to an available process engine (<xref
                    ref-type="bibr" rid="B49">Zschumme 2021a</xref>). While the current
                implementation of the process engine primarily uses the local file system of the
                machine on which it is running, users can add steps to synchronize data with the
                repository to their workflow at will. To increase performance with multiple parallel
                requests for workflow execution, the requests can be distributed to process engines
                running on additional servers. By wrapping the process manager with a simple HTTP
                API, for example, its interface can easily be used over a network. A message broker
                is used to decouple longer running or periodically executed background tasks from
                the rest of the application, by delegating them to one or more background worker
                processes. Apart from using locally managed user accounts or an LDAP system for
                authentication, Shibboleth (<xref ref-type="bibr" rid="B6">Cantor &amp; Seavo
                    2005</xref>) can be used as well. From a technical point of view, Shibboleth is
                not a single system, but the interaction of several components, which together
                enable a distributed authentication procedure. Depending on the type of
                authentication, user attributes or group affiliations can also be used for
                authorization purposes in the future.</p>
            <p>Another component shown in <bold><italic><xref ref-type="fig" rid="F4">Figure
                            4</xref></italic></bold> are the plugins. These can be used to customize
                or extend the basic functionality of certain procedures or actions, without having
                to modify or know the corresponding implementation in detail. Unlike the tools in a
                workflow, plugins make use of predefined hooks to add their custom functionality.
                While such a plugin has to be installed centrally by the system administrator for
                all users of a Kadi4Mat instance, the possibilities are also evaluated to be able to
                make use of individual plugins on the user level.</p>
        </sec>
        <sec>
            <title>4 Results</title>
            <p>The current functionalities of Kadi4Mat can either be utilised via the graphical user
                interface, with a browser, or via the HTTP API, with a suitable client. On top of
                the API, a Python library is developed, which makes it especially easy to interact
                with the different functionalities (<xref ref-type="bibr" rid="B38">Schoof &amp;
                    Brandt 2020</xref>). Besides using the library in Python code, it offers a
                command line interface, enabling the integration with other programming or scripting
                languages.</p>
            <p>In the following, the most important features of Kadi4Mat are explained, based on its
                graphical user interface. The focus of the features implemented so far is on the
                repository component, the topics of structured data management and data exchange in
                particular, as well as on the workflows, which are a central part of the ELN&#8217;s
                functionality. After logging in to Kadi4Mat, it is possible to create different
                types of resources. The most important type of resource are the so-called
                    <italic>records</italic>, which can link arbitrary data with descriptive
                metadata and serve as basic components that can be used in workflows and future data
                publications. In principle, a record can be used for all kinds of data, including
                data from simulations or experiments, and it can be linked to other records of
                related data sets, e.g. to the descriptions of the software and hardware devices
                used. The metadata of a record includes both basic metadata, such as title or
                description, and domain-specific metadata, which can be specified generically, in
                the form of key/value pairs. The latter can be defined using a special editor, as
                shown in <bold><italic><xref ref-type="fig" rid="F5">Figure
                5</xref></italic></bold>. With the help of such metadata, a description of subject-
                and application-specific records becomes possible. This is particularly relevant in
                an interdisciplinary research field such as materials science, where using a fixed
                schema would be impracticable, due to the heterogeneity of the data formats and the
                corresponding metadata. The value of each metadata entry can be of different types,
                such as simple character strings or numeric types like integers and floating point
                numbers. Numeric values can also be provided with an arbitrary unit. Furthermore,
                nested types can be used to represent metadata structures of almost any complexity,
                for example in the form of lists. The input of such structures can be simplified by
                templates, which are specified in advance and can be combined as desired. While
                templates currently offer the same possibilities as the actual metadata, it is
                planned to add further validation functionalities, such as the specification of a
                selection of valid values for certain metadata keys. Wherever possible,
                automatically recorded metadata is also available in each record, such as the
                creator of the record or the creation date. The actual data of the record can be
                uploaded by the users and are currently stored on a file system, accessible by
                Kadi4Mat. It is possible to upload any number of files for each record. This can be
                helpful when dealing with a series of several hundred images of a simulated
                microstructure, for example, which all share the same metadata.</p>
            <fig id="F5">
                <label>Figure 5</label>
                <caption>
                    <p>Screenshot of the generic metadata editor, showing the different types of
                        metadata entries currently possible. The last two examples of type
                            <italic>dictionary</italic> and <italic>list</italic> contain nested
                        metadata entries. In the upper right corner, a menu is displayed that allows
                        performing various actions, one of which switches to a tree-based overview
                        of the metadata. The ability to select metadata templates is shown in the
                        lower right corner.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g5.png"
                />
            </fig>
            <p>The created record can be viewed on an overview page that displays all metadata and
                linked files. Some common file formats include a preview that is directly integrated
                into the web browser, such as image files, PDFs, archives or textual data.
                Furthermore, the access rights of the record are displayed on its overview page.
                Currently, two levels of visibility can be set when creating a record: public and
                private visibility. While public records can be viewed by every logged-in user, i.e.
                read rights are granted implicitly to each user, private records can initially only
                be viewed by their creator. Only the creator of a record can perform further
                actions, such as editing the metadata or deleting the entire record.
                            <bold><italic><xref ref-type="fig" rid="F6">Figure
                    6</xref></italic></bold> shows the overview page of a record, including its
                metadata and the menu to perform the previously mentioned actions. In order to grant
                different access rights to other users, even within private records, different roles
                can be defined for any user in a separate view. Currently, the roles are static,
                which means that they can be selected from a predefined list and are each linked to
                the corresponding fine-grained permissions. Because of these permissions, the
                possibility of custom roles or certain actions being linked to different user
                attributes becomes possible. In addition to roles for individual users, roles can
                also be defined for <italic>groups</italic>. These are simple groupings of several
                users which, similar to records, can be created and managed by the users themselves.
                The same roles that can be defined for individual users can be assigned to groups as
                well. Each member of the group is granted the corresponding access rights
                automatically. Finally, the overview page of a record also shows the resources
                linked to it. This refers in particular to the so-called
                    <italic>collections</italic>. Collections represent simple, logical groupings of
                multiple records and can thus contribute to a better organization of resources. In
                terms of an ontology, collections can be regarded as classes, while records inside a
                collection represent concrete instances of such a class. Like records and groups,
                collections can be created and managed by users. Records can also be linked to other
                records. Each record link represents a separate resource, which in turn can contain
                certain metadata. The ability to specify generic metadata and such resource links
                already enables a basic ontology-like structure. This structure can be further
                improved in the future, e.g. by using different types of links, with varying
                semantics, and by allowing collections to be nested.</p>
            <fig id="F6">
                <label>Figure 6</label>
                <caption>
                    <p>Screenshot of a record overview page. The basic metadata is shown, followed
                        by the generic metadata entries (shown as <italic>extra metadata</italic>).
                        The menu on the top allows various actions to be performed on the current
                        record. The tabs below the menu are used to switch to other views that
                        display the files and other resources associated with the current record, as
                        well as access permissions and a history of metadata revisions.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g6.png"
                />
            </fig>
            <p>To be able to find different resources efficiently, especially records, a search
                function is included in Kadi4Mat. This allows searching in the basic metadata of
                resources and in the generic metadata of records via keywords or full text search.
                The values of nested generic metadata entries are flattened before they are indexed
                in Elasticsearch. This way, a common search mapping can be defined for all kinds of
                generic metadata. The search results can be sorted and filtered in various ways, for
                example, by using different user-defined tags or data formats, in the case of
                records. <bold><italic><xref ref-type="fig" rid="F7">Figure 7</xref></italic></bold>
                shows an example search of records, with the corresponding results.</p>
            <fig id="F7">
                <label>Figure 7</label>
                <caption>
                    <p>Screenshot of the search functionality of records with the corresponding
                        search results. In addition to providing a simple query for searching the
                        basic metadata of a record, the generic metadata can also be searched by
                        specifying desired keys, types or values. The searchable types are derived
                        from the actual types of the generic metadata entries, e.g. integers and
                        floating point numbers are grouped together as numeric type. Various other
                        options are offered for filtering and sorting the search results.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g7.png"
                />
            </fig>
            <p>While the execution of workflows, via the web interfaces, and the ability to add
                user-defined tools are still under development, it is possible to define a workflow
                using a graphical node editor, running in the web browser. <bold><italic><xref
                            ref-type="fig" rid="F8">Figure 8</xref></italic></bold> shows a simple
                example workflow created with this editor. A selection of predefined nodes can be
                combined and parameterized, while the resulting workflow can be downloaded. A custom
                JSON-based format is currently used to store the representation of a workflow. This
                format contains all the information for the node editor to correctly display the
                workflow and to derive a functional workflow representation for execution. The
                downloaded workflow file can be executed directly on a local workstation by using
                the command line interface of the process manager. All tools to actually run such a
                workflow need to be installed beforehand. A selection of tools is provided for
                various tasks (<xref ref-type="bibr" rid="B51">Zschumme et al. 2020</xref>),
                including connecting to a Kadi4Mat instance by using a suitable wrapper on top of
                the aforementioned API library. Several common use cases have already been
                implemented, including the task of extracting metadata from Excel spreadsheets,
                often used to replace an actual ELN system, and importing it into Kadi4Mat. An
                overview of such a workflow is shown in <bold><italic><xref ref-type="fig" rid="F9"
                            >Figure 9</xref></italic></bold>. Developments are also underway for
                data science applications, especially in the field of machine learning. The
                combination of the ELN and the repository fits particularly well with the
                requirements of such applications, which typically require lots of high-quality
                input data to function well.</p>
            <fig id="F8">
                <label>Figure 8</label>
                <caption>
                    <p>Screenshot of a workflow created with the web-based node editor. Several
                            <italic>String</italic> input nodes are shown, as well as a special node
                        that prompts the user to enter a file (<italic>UserInput: File</italic>).
                        The two tools <italic>mkdir</italic> and <italic>ImageJMacro</italic> are
                        used to create a new directory and to execute an ImageJ (<xref
                            ref-type="bibr" rid="B37">Schindelin et al. 2015</xref>) macro file,
                        respectively. The latter uses the input file the user was asked for. Except
                        for the input nodes, all nodes are connected via an explicit dependency.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g8.png"
                />
            </fig>
            <fig id="F9">
                <label>Figure 9</label>
                <caption>
                    <p>Overview of an exemplary workflow using Kadi4Mat. The starting point is raw
                        data and corresponding metadata stored in an Excel spreadsheet. The tools
                        used in this workflow are divided into tools for data handling and tools for
                        data transport, the latter referring to the Kadi4Mat integration. In a first
                        conversion step, the metadata are transformed into a format readable by the
                        API of Kadi4Mat and linked to the raw data by creating a new record. The raw
                        data is further analysed using the metadata stored in Kadi4Mat. Finally, the
                        result of the analysis is plotted and both data sets are uploaded to
                        Kadi4Mat as records. All records can be linked to each other in a further
                        step, either as part of the workflow or separately.</p>
                </caption>
                <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="dsj-20-1282-g9.png"
                />
            </fig>
        </sec>
        <sec>
            <title>5 Conclusion</title>
            <p>The development and current functionality of the research data infrastructure
                Kadi4Mat is presented. The objective of this infrastructure is to combine the
                features of an ELN and a repository in such a way that researchers can be supported
                throughout the whole research process. The ongoing development aims at covering the
                heterogeneous use cases of materials science disciplines. For this purpose, flexible
                metadata schemas, workflows and tools are especially important, as is the use of
                custom installations and instances. The basic functionality of the repository
                component is largely given by the features already implemented and can be used with
                a graphical as well as a programmatic interface. This includes, above all,
                uploading, managing and exchanging data as well as the associated metadata. The
                latter can be defined with a flexible metadata editor to accommodate the needs of
                different users and workgroups. A search functionality enables the efficient
                retrieval of the data. The essential infrastructure for workflows is implemented as
                a central part of the ELN component. Simple workflows can be defined with an initial
                version of the web-based node editor and executed locally using provided tools and
                the process manager&#8217;s command line interface. Both main components are
                improved continuously. Various other features that have not yet been mentioned as
                part of the concept are planned or are already in the conception stage. These
                include the optional connection of several Kadi4Mat instances, a more direct,
                low-level access to data and the integration of an app store, for the central
                administration of tools and plugins.</p>
            <p>The development of Kadi4Mat largely follows a bottom-up approach. Instead of
                developing concepts in advance, to cover as many use cases as possible, a basic
                technical infrastructure is established first. On this basis, further steps are
                evaluated in exchange with interested users and by implementing best practice
                examples. Due to the heterogeneous nature of materials science, most features are
                kept very generic. The concrete structuring of the data storage, the metadata and
                the workflows is largely left to the users. As a positive side effect, an extension
                of the research data infrastructure to other disciplines is possible in the
                future.</p>
        </sec>
    </body>
    <back>
        <ack>
            <title>Acknowledgements</title>
            <p>This work is supported by the Federal Ministry of Education and Research (BMBF) in
                the projects FestBatt (project number 03XP0174E) and as part of the Excellence
                Strategy of the German Federal and State Governments, by the German Research
                Foundation (DFG) in the projects POLiS (project number 390874152) and SuLMaSS
                (project number 391128822) and by the Ministry of Science, Research and Art
                Baden-W&#252;rttemberg in the project MoMaF &#8211; Science Data Center, with funds
                from the state digitization strategy digital@bw (project number 57). The authors are
                also grateful for the editorial support of Leon Geisen.</p>
        </ack>
        <sec>
            <title>Competing Interests</title>
            <p>The authors have no competing interests to declare.</p>
        </sec>
        <ref-list>
            <ref id="B1">
                <label>1</label>
                <mixed-citation publication-type="journal"><string-name><surname>Afgan</surname>,
                            <given-names>E</given-names></string-name>, et al. <month>July</month>
                    <day>2</day>, <year>2018</year>. <article-title>The Galaxy Platform for
                        Accessible, Reproducible and Collaborative Biomedical Analyses: 2018
                        Update</article-title>. <source>Nucleic Acids Research</source>,
                        <volume>46</volume>(<issue>W1</issue>):
                        <fpage>W537</fpage>&#8211;<lpage>W544</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1093/nar/gky379</pub-id></mixed-citation>
            </ref>
            <ref id="B2">
                <label>2</label>
                <mixed-citation publication-type="journal"><string-name><surname>Amorim</surname>,
                            <given-names>RC</given-names></string-name>, et al. <month>Nov</month>.
                        <year>2017</year>. <article-title>A Comparison of Research Data Management
                        Platforms: Architecture, Flexible Metadata and
                        Interoperability</article-title>. <source>Universal Access in the
                        Information Society</source>, <volume>16</volume>(<issue>4</issue>):
                        <fpage>851</fpage>&#8211;<lpage>862</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1007/s10209-016-0475-y</pub-id></mixed-citation>
            </ref>
            <ref id="B3">
                <label>3</label>
                <mixed-citation publication-type="journal"><string-name><surname>Bird</surname>,
                            <given-names>CL</given-names></string-name>,
                            <string-name><surname>Willoughby</surname>,
                        <given-names>C</given-names></string-name> and
                            <string-name><surname>Frey</surname>,
                        <given-names>JG</given-names></string-name>. <year>2013</year>.
                        <article-title>Laboratory Notebooks in the Digital Era: The Role of ELNs in
                        Record Keeping for Chemistry and Other Sciences</article-title>.
                        <source>Chemical Society Reviews</source>,
                        <volume>42</volume>(<issue>20</issue>): <fpage>8157</fpage>. DOI: <pub-id
                        pub-id-type="doi">10.1039/c3cs60122f</pub-id></mixed-citation>
            </ref>
            <ref id="B4">
                <label>4</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Brandt</surname>,
                            <given-names>N</given-names></string-name>. <year>2020</year>.
                        <source>Kadi4Mat &#8211; Karlsruhe Data Infrastructure for Materials
                        Science</source>. URL: <uri>https://kadi.iam-cms.kit.edu</uri> (visited on
                    Sept. 30, 2020).</mixed-citation>
            </ref>
            <ref id="B5">
                <label>5</label>
                <mixed-citation publication-type="journal"><string-name><surname>Brandt</surname>,
                            <given-names>N</given-names></string-name>, et al. <month>Oct</month>.
                        <day>16</day>, <year>2020</year>. <article-title>IAM-CMS/Kadi:
                        Kadi4Mat</article-title>. Version 0.2.0. <source>Zenodo</source>. DOI:
                        <pub-id pub-id-type="doi">10.5281/ZENODO.4088270</pub-id></mixed-citation>
            </ref>
            <ref id="B6">
                <label>6</label>
                <mixed-citation publication-type="journal"><string-name><surname>Cantor</surname>,
                            <given-names>S</given-names></string-name> and
                            <string-name><surname>Scavo</surname>,
                        <given-names>T</given-names></string-name>. <year>2005</year>.
                        <article-title>Shibboleth Architecture</article-title>. <source>Protocols
                        and Profiles</source>, <volume>10</volume>: <fpage>16</fpage>. DOI: <pub-id
                        pub-id-type="doi">10.26869/TI.66.1</pub-id></mixed-citation>
            </ref>
            <ref id="B7">
                <label>7</label>
                <mixed-citation publication-type="journal"><string-name><surname>CARPi</surname>,
                            <given-names>N</given-names></string-name>,
                            <string-name><surname>Minges</surname>,
                        <given-names>A</given-names></string-name> and
                            <string-name><surname>Piel</surname>,
                        <given-names>M</given-names></string-name>. <month>Apr</month>.
                        <day>14</day>, <year>2017</year>. <article-title>eLabFTW: An Open Source
                        Laboratory Notebook for Research Labs</article-title>. <source>The Journal
                        of Open Source Software</source>, <volume>2</volume>(<issue>12</issue>):
                        <fpage>146</fpage>. DOI: <pub-id pub-id-type="doi"
                        >10.21105/joss.00146</pub-id></mixed-citation>
            </ref>
            <ref id="B8">
                <label>8</label>
                <mixed-citation publication-type="journal"><string-name><surname>Carusi</surname>,
                            <given-names>A</given-names></string-name> and
                            <string-name><surname>Reimer</surname>,
                        <given-names>T</given-names></string-name>. <month>Jan</month>.
                        <year>2010</year>. <article-title>Virtual Research Environment Collaborative
                        Landscape Study</article-title>. <source>JISC
                    Report</source>.</mixed-citation>
            </ref>
            <ref id="B9">
                <label>9</label>
                <mixed-citation publication-type="webpage"><collab>CKAN Association</collab>.
                        <year>2014</year>. <source>Ckan &#8211; The Open Source Data Portal
                        Software</source>. URL: <uri>https://ckan.org/</uri> (visited on May 19,
                    2020).</mixed-citation>
            </ref>
            <ref id="B10">
                <label>10</label>
                <mixed-citation publication-type="journal"><collab>DataCite Metadata Working
                        Group</collab>. <year>2019</year>. <source>DataCite Metadata Schema
                        Documentation for the Publication and Citation of Research Data
                        v4.3</source>. Version 4.3. DataCite.</mixed-citation>
            </ref>
            <ref id="B11">
                <label>11</label>
                <mixed-citation publication-type="webpage"><collab>Django Software
                        Foundation</collab>. <year>2005</year>. <source>Django &#8211; The Web
                        Framework for Perfectionists with Deadlines</source>. URL:
                        <uri>https://www.djangoproject.com/</uri> (visited on May 25,
                    2020).</mixed-citation>
            </ref>
            <ref id="B12">
                <label>12</label>
                <mixed-citation publication-type="journal"><string-name><surname>Draxl</surname>,
                            <given-names>C</given-names></string-name> and
                            <string-name><surname>Scheffler</surname>,
                        <given-names>M</given-names></string-name>. <month>Sept</month>.
                        <year>2018</year>. <article-title>NOMAD: The FAIR Concept for Big
                        Data-Driven Materials Science</article-title>. <source>MRS Bulletin</source>
                    <volume>43</volume>(<issue>9</issue>):
                        <fpage>676</fpage>&#8211;<lpage>682</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1557/mrs.2018.208</pub-id></mixed-citation>
            </ref>
            <ref id="B13">
                <label>13</label>
                <mixed-citation publication-type="webpage"><string-name><surname>Elastic</surname>,
                            <given-names>NV</given-names></string-name>. <year>2010</year>.
                        <source>Elasticsearch &#8211; The Official Distributed Search &amp;
                        Analytics Engine</source>. URL:
                        <uri>https://www.elastic.co/elasticsearch</uri> (visited on June 2,
                    2020).</mixed-citation>
            </ref>
            <ref id="B14">
                <label>14</label>
                <mixed-citation publication-type="journal"><string-name><surname>Elliott</surname>,
                            <given-names>MH</given-names></string-name>. <year>2009</year>.
                        <article-title>Thinking beyond ELN</article-title>. <source>Scientific
                        computing</source>, <volume>26</volume>(<issue>6</issue>):
                        <fpage>6</fpage>&#8211;<lpage>10</lpage>.</mixed-citation>
            </ref>
            <ref id="B15">
                <label>15</label>
                <mixed-citation publication-type="webpage"><collab>European Materials Modelling
                        Council</collab>. <year>2019</year>. <source>European Materials and
                        Modelling Ontology</source>. URL: <uri>https://github.com/emmo-repo</uri>
                    (visited on May 24, 2020).</mixed-citation>
            </ref>
            <ref id="B16">
                <label>16</label>
                <mixed-citation publication-type="webpage"><collab>European Organization For Nuclear
                        Research</collab>. <year>2016</year>. <source>Invenio &#8211; Open Source
                        Framework for Large-Scale Digital Repositories</source>. URL:
                        <uri>https://invenio-software.org/</uri> (visited on May 19,
                    2020).</mixed-citation>
            </ref>
            <ref id="B17">
                <label>17</label>
                <mixed-citation publication-type="journal"><collab>European Organization For Nuclear
                        Research &amp; OpenAIRE</collab>. <year>2013</year>.
                    <source>Zenodo</source>. DOI: <pub-id pub-id-type="doi"
                        >10.25495/7GXK-RD71</pub-id></mixed-citation>
            </ref>
            <ref id="B18">
                <label>18</label>
                <mixed-citation publication-type="book"><string-name><surname>Fielding</surname>,
                            <given-names>RT</given-names></string-name> and
                            <string-name><surname>Taylor</surname>,
                        <given-names>RN</given-names></string-name>. <year>2000</year>.
                        <source>Architectural Styles and the Design of Network-Based Software
                        Architectures</source>. Vol. <volume>7</volume>.
                        <publisher-loc>Irvine</publisher-loc>: <publisher-name>University of
                        California</publisher-name>.</mixed-citation>
            </ref>
            <ref id="B19">
                <label>19</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Ghiringhelli</surname>,
                            <given-names>LM</given-names></string-name>, et al. <month>Dec</month>.
                        <year>2017</year>. <article-title>Towards Efficient Data Exchange and
                        Sharing for Big-Data Driven Materials Science: Metadata and Data
                        Formats</article-title>. <source>npj Computational Materials</source>
                    <volume>3</volume>(<issue>1</issue>). DOI: <pub-id pub-id-type="doi"
                        >10.1038/s41524-017-0048-5</pub-id></mixed-citation>
            </ref>
            <ref id="B20">
                <label>20</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Greenberg</surname>,
                        <given-names>J</given-names></string-name>, et al. <month>Nov</month>.
                        <day>30</day>, <year>2009</year>. <article-title>A Metadata Best Practice
                        for a Scientific Data Repository</article-title>. <source>Journal of Library
                        Metadata</source>, <volume>9</volume>(<issue>3&#8211;4</issue>):
                        <fpage>194</fpage>&#8211;<lpage>212</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1080/19386380903405090</pub-id></mixed-citation>
            </ref>
            <ref id="B21">
                <label>21</label>
                <mixed-citation publication-type="journal"><string-name><surname>Hawker</surname>,
                            <given-names>CD</given-names></string-name>. <month>Dec</month>.
                        <year>2007</year>. <article-title>Laboratory Automation: Total and
                        Subtotal</article-title>. <source>Clinics in Laboratory Medicine</source>,
                        <volume>27</volume>(<issue>4</issue>):
                        <fpage>749</fpage>&#8211;<lpage>770</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.cll.2007.07.010</pub-id></mixed-citation>
            </ref>
            <ref id="B22">
                <label>22</label>
                <mixed-citation publication-type="book"><string-name><surname>Hey</surname>,
                            <given-names>AJG</given-names></string-name>, (ed.). <year>2009</year>.
                        <source>The Fourth Paradigm: Data-Intensive Scientific Discovery</source>.
                        <publisher-loc>Redmond, Washington</publisher-loc>:
                        <publisher-name>Microsoft Research</publisher-name>. pp.
                    <fpage>251</fpage>.</mixed-citation>
            </ref>
            <ref id="B23">
                <label>23</label>
                <mixed-citation publication-type="book"><string-name><surname>Hey</surname>,
                            <given-names>T</given-names></string-name> and
                            <string-name><surname>Trefethen</surname>,
                        <given-names>A</given-names></string-name>. <month>Mar</month>.
                        <day>11</day>, <year>2003</year>. <chapter-title>The Data Deluge: An
                        e-Science Perspective</chapter-title>. In:
                            <string-name><surname>Berman</surname>,
                        <given-names>F</given-names></string-name>,
                            <string-name><surname>Fox</surname>,
                        <given-names>G</given-names></string-name> and
                            <string-name><surname>Hey</surname>,
                        <given-names>T</given-names></string-name> (eds.), <source>Wiley Series in
                        Communications Networking &amp; Distributed Systems</source>.
                        <publisher-loc>Chichester, UK</publisher-loc>: <publisher-name>John Wiley
                        &amp; Sons, Ltd</publisher-name>. pp.
                        <fpage>809</fpage>&#8211;<lpage>824</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1002/0470867167.ch36</pub-id></mixed-citation>
            </ref>
            <ref id="B24">
                <label>24</label>
                <mixed-citation publication-type="journal"><string-name><surname>Hill</surname>,
                            <given-names>J</given-names></string-name>, et al. <month>May</month>
                    <year>2016</year>. <article-title>Materials Science with Large-Scale Data and
                        Informatics: Unlocking New Opportunities</article-title>. <source>MRS
                        Bulletin</source>, <volume>41</volume>(<issue>5</issue>):
                        <fpage>399</fpage>&#8211;<lpage>409</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1557/mrs.2016.93</pub-id></mixed-citation>
            </ref>
            <ref id="B25">
                <label>25</label>
                <mixed-citation publication-type="journal"><string-name><surname>Jain</surname>,
                            <given-names>A</given-names></string-name>, et al. <month>July</month>
                    <year>2013</year>. <article-title>Commentary: The Materials Project: A Materials
                        Genome Approach to Accelerating Materials Innovation</article-title>.
                        <source>APL Materials</source>, <volume>1</volume>(<issue>1</issue>):
                        <fpage>011002</fpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1063/1.4812323</pub-id></mixed-citation>
            </ref>
            <ref id="B26">
                <label>26</label>
                <mixed-citation publication-type="journal"><string-name><surname>King</surname>,
                            <given-names>G</given-names></string-name>. <month>Nov</month>.
                        <year>2007</year>. <article-title>An Introduction to the Dataverse Network
                        as an Infrastructure for Data Sharing</article-title>. <source>Sociological
                        Methods &amp; Research</source>, <volume>36</volume>(<issue>2</issue>):
                        <fpage>173</fpage>&#8211;<lpage>199</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1177/0049124107306660</pub-id></mixed-citation>
            </ref>
            <ref id="B27">
                <label>27</label>
                <mixed-citation publication-type="book"><string-name><surname>Kluyver</surname>,
                            <given-names>T</given-names></string-name>, et al. <year>2016</year>.
                        <chapter-title>Jupyter Notebooks &#8211; a Publishing Format for
                        Reproducible Computational Workflows</chapter-title>. In:
                            <string-name><surname>Loizides</surname>,
                        <given-names>F</given-names></string-name> and
                            <string-name><surname>Scmidt</surname>,
                        <given-names>B</given-names></string-name> (eds.), <source>Positioning and
                        Power in Academic Publishing: Players, Agents and Agendas</source>.
                        <publisher-loc>Netherlands</publisher-loc>: <publisher-name>IOS
                        Press</publisher-name>. pp. <fpage>87</fpage>&#8211;<lpage>90</lpage>. DOI:
                        <pub-id pub-id-type="doi"
                    >10.3233/978-1-61499-649-1-87</pub-id></mixed-citation>
            </ref>
            <ref id="B28">
                <label>28</label>
                <mixed-citation publication-type="journal"><string-name><surname>Naughton</surname>,
                            <given-names>L</given-names></string-name> and
                            <string-name><surname>Kernohan</surname>,
                        <given-names>D</given-names></string-name>. <month>Mar</month>.
                    <day>7</day>, <year>2016</year>. <article-title>Making Sense of Journal Research
                        Data Policies</article-title>. <source>Insights the UKSG journal</source>,
                        <volume>29</volume>(<issue>1</issue>):
                        <fpage>84</fpage>&#8211;<lpage>89</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1629/uksg.284</pub-id></mixed-citation>
            </ref>
            <ref id="B29">
                <label>29</label>
                <mixed-citation publication-type="webpage"><collab>Open Archives
                    Initiative</collab>. <year>2015</year>. <source>Open Archives Initiative
                        Protocol for Metadata Harvesting</source>. URL:
                        <uri>http://www.openarchives.org/pmh/</uri> (visited on May 25,
                    2020).</mixed-citation>
            </ref>
            <ref id="B30">
                <label>30</label>
                <mixed-citation publication-type="journal"><collab>P. Bryan Heidorn</collab>.
                        <year>2008</year>. <article-title>Shedding Light on the Dark Data in the
                        Long Tail of Science</article-title>. <source>Library Trends</source>,
                        <volume>57</volume>(<issue>2</issue>):
                        <fpage>280</fpage>&#8211;<lpage>299</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1353/lib.0.0036</pub-id></mixed-citation>
            </ref>
            <ref id="B31">
                <label>31</label>
                <mixed-citation publication-type="journal"><string-name><surname>Pampel</surname>,
                            <given-names>H</given-names></string-name>, et al. <month>Nov</month>.
                        <day>4</day>, <year>2013</year>. <article-title>Making Research Data
                        Repositories Visible: The <italic>Re3data.Org</italic>
                        Registry</article-title>. In: <string-name><surname>Suleman</surname>,
                            <given-names>H</given-names></string-name> (ed.), <source>PLoS
                        ONE</source>, <volume>8</volume>(<issue>11</issue>): <fpage>e78080</fpage>.
                    DOI: <pub-id pub-id-type="doi"
                    >10.1371/journal.pone.0078080</pub-id></mixed-citation>
            </ref>
            <ref id="B32">
                <label>32</label>
                <mixed-citation publication-type="webpage"><collab>PostgreSQL Global Development
                        Group</collab>. <year>1996</year>. <source>PostgreSQL: The World&#8217;s
                        Most Advanced Open Source Database</source>. URL:
                        <uri>https://www.postgresql.org/</uri> (visited on Sept. 30,
                    2020).</mixed-citation>
            </ref>
            <ref id="B33">
                <label>33</label>
                <mixed-citation publication-type="journal"><string-name><surname>Potthoff</surname>,
                            <given-names>J</given-names></string-name>, et al. <month>Mar</month>.
                        <year>2019</year>. <article-title>Procedures for Systematic Capture and
                        Management of Analytical Data in Academia</article-title>. In:
                        <source>Analytica Chimica Acta</source>: <volume>X</volume>,
                        <issue>1</issue>: <fpage>100007</fpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.acax.2019.100007</pub-id></mixed-citation>
            </ref>
            <ref id="B34">
                <label>34</label>
                <mixed-citation publication-type="journal"><string-name><surname>Rubacha</surname>,
                            <given-names>M</given-names></string-name>,
                            <string-name><surname>Rattan</surname>,
                        <given-names>AK</given-names></string-name> and
                            <string-name><surname>Hosselet</surname>,
                        <given-names>SC</given-names></string-name>. <month>Feb</month>.
                        <year>2011</year>. <article-title>A Review of Electronic Laboratory
                        Notebooks Available in the Market Today</article-title>. <source>Journal of
                        Laboratory Automation</source>, <volume>16</volume>(<issue>1</issue>):
                        <fpage>90</fpage>&#8211;<lpage>98</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1016/j.jala.2009.01.002</pub-id></mixed-citation>
            </ref>
            <ref id="B35">
                <label>35</label>
                <mixed-citation publication-type="journal"><string-name><surname>Sandfeld</surname>,
                            <given-names>S</given-names></string-name>, et al. <year>2018</year>.
                        <source>Strategiepapier &#8211; Digitale Transformation in der
                        Materialwissenschaft und Werkstofftechnik</source>.</mixed-citation>
            </ref>
            <ref id="B36">
                <label>36</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Schembera</surname>,
                        <given-names>B</given-names></string-name> and
                            <string-name><surname>Iglezakis</surname>,
                        <given-names>D</given-names></string-name>. <year>2020</year>.
                        <article-title>EngMeta: Metadata for Computational
                        Engineering</article-title>. <source>International Journal of Metadata,
                        Semantics and Ontologies</source>, <volume>14</volume>(<issue>1</issue>):
                        <fpage>26</fpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1504/IJMSO.2020.107792</pub-id></mixed-citation>
            </ref>
            <ref id="B37">
                <label>37</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Schindelin</surname>,
                            <given-names>J</given-names></string-name>, et al. <month>July</month>
                    <year>2015</year>. <article-title>The ImageJ Ecosystem: An Open Platform for
                        Biomedical Image Analysis</article-title>. <source>Molecular Reproduction
                        and Development</source>, <volume>82</volume>(<issue>7&#8211;8</issue>):
                        <fpage>518</fpage>&#8211;<lpage>529</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1002/mrd.22489</pub-id></mixed-citation>
            </ref>
            <ref id="B38">
                <label>38</label>
                <mixed-citation publication-type="book"><string-name><surname>Schoof</surname>,
                            <given-names>E</given-names></string-name> and
                            <string-name><surname>Brandt</surname>,
                        <given-names>N</given-names></string-name>. <month>Oct</month>.
                        <day>16</day>, <year>2020</year>. <source>IAM-CMS/Kadi-Apy: Kadi4Mat API
                        Library</source>. Version 0.2.1. <publisher-name>Zenodo</publisher-name>.
                    DOI: <pub-id pub-id-type="doi">10.5281/ZENODO.4088276</pub-id></mixed-citation>
            </ref>
            <ref id="B39">
                <label>39</label>
                <mixed-citation publication-type="webpage"><collab>SciNote LLC</collab>
                    <year>2015</year>. <source>SciNote &#8211; Electronic Lab Notebook &amp;
                        Inventory Management</source>. URL: <uri>https://www.scinote.net/</uri>
                    (visited on May 21, 2020).</mixed-citation>
            </ref>
            <ref id="B40">
                <label>40</label>
                <mixed-citation publication-type="journal"><string-name><surname>Smith</surname>,
                            <given-names>M</given-names></string-name>, et al. <month>Jan</month>.
                        <year>2003</year>. <article-title>DSpace: An Open Source Dynamic Digital
                        Repository</article-title>. <source>D-Lib Magazine</source>,
                        <volume>9</volume>(<issue>1</issue>). DOI: <pub-id pub-id-type="doi"
                        >10.1045/january2003-smith</pub-id></mixed-citation>
            </ref>
            <ref id="B41">
                <label>41</label>
                <mixed-citation publication-type="journal"><string-name><surname>Taylor</surname>,
                            <given-names>KT</given-names></string-name>. <year>2006</year>.
                        <article-title>The Status of Electronic Laboratory Notebooks for Chemistry
                        and Biology</article-title>. <source>Current Opinion in Drug Discovery and
                        Development</source>, <volume>9</volume>(<issue>3</issue>):
                        <fpage>348</fpage>.</mixed-citation>
            </ref>
            <ref id="B42">
                <label>42</label>
                <mixed-citation publication-type="journal"><collab>The FAIRsharing
                        Community</collab>, et al. <month>Apr</month>. <year>2019</year>.
                        <article-title>FAIRsharing as a Community Approach to Standards,
                        Repositories and Policies</article-title>. <source>Nature
                        Biotechnology</source>, <volume>37</volume>(<issue>4</issue>):
                        <fpage>358</fpage>&#8211;<lpage>367</lpage>. DOI: <pub-id pub-id-type="doi"
                        >10.1038/s41587-019-0080-8</pub-id></mixed-citation>
            </ref>
            <ref id="B43">
                <label>43</label>
                <mixed-citation publication-type="webpage"><collab>The MathWorks, Inc</collab>.
                        <year>2021</year>. <source>MATLAB &#8211; MathWorks</source>. URL:
                        <uri>https://www.mathworks.com/products/matlab.html</uri> (visited on Jan.
                    19, 2021).</mixed-citation>
            </ref>
            <ref id="B44">
                <label>44</label>
                <mixed-citation publication-type="webpage"><collab>The Pallets Projects</collab>.
                        <year>2015</year>. <source>Flask &#8211; The Pallets Projects</source>. URL:
                        <uri>https://palletsprojects.com/p/flask/</uri> (visited on May 25,
                    2020).</mixed-citation>
            </ref>
            <ref id="B45">
                <label>45</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Tremouilhac</surname>,
                            <given-names>P</given-names></string-name>, et al. <month>Dec</month>.
                        <year>2017</year>. <article-title>Chemotion ELN: An Open Source Electronic
                        Lab Notebook for Chemists in Academia</article-title>. <source>Journal of
                        Cheminformatics</source>, <volume>9</volume>(<issue>1</issue>). DOI: <pub-id
                        pub-id-type="doi">10.1186/s13321-017-0240-0</pub-id></mixed-citation>
            </ref>
            <ref id="B46">
                <label>46</label>
                <mixed-citation publication-type="webpage"><collab>Vue Core Development
                        Team</collab>. <year>2014</year>. <source>Vue.Js &#8211; The Progressive
                        JavaScript Framework</source>. URL: <uri>https://vuejs.org/</uri> (visited
                    on May 25, 2020).</mixed-citation>
            </ref>
            <ref id="B47">
                <label>47</label>
                <mixed-citation publication-type="journal"
                        ><string-name><surname>Wilkinson</surname>,
                        <given-names>MD</given-names></string-name>, et al. <month>Dec</month>.
                        <year>2016</year>. <article-title>The FAIR Guiding Principles for Scientific
                        Data Management and Stewardship</article-title>. <source>Scientific
                        Data</source>, <volume>3</volume>(<issue>1</issue>). DOI: <pub-id
                        pub-id-type="doi">10.1038/sdata.2016.18</pub-id></mixed-citation>
            </ref>
            <ref id="B48">
                <label>48</label>
                <mixed-citation publication-type="journal"
                            ><string-name><surname>Wolstencroft</surname>,
                            <given-names>K</given-names></string-name>, et al. <month>July</month>
                    <day>1</day>, <year>2013</year>. <article-title>The Taverna Workflow Suite:
                        Designing and Executing Workflows of Web Services on the Desktop, Web or in
                        the Cloud</article-title>. <source>Nucleic Acids Research</source>,
                        <volume>41</volume>(<issue>W1</issue>):
                        <fpage>W557</fpage>&#8211;<lpage>W561</lpage>. DOI: <pub-id
                        pub-id-type="doi">10.1093/nar/gkt328</pub-id></mixed-citation>
            </ref>
            <ref id="B49">
                <label>49</label>
                <mixed-citation publication-type="journal"><string-name><surname>Zschumme</surname>,
                            <given-names>P</given-names></string-name>. <month>Jan</month>.
                        <day>15</day>, <year>2021a</year>.
                        <article-title>IAM-CMS/Process-Engine</article-title>. Version 0.1.0.
                        <source>Zenodo</source>. DOI: <pub-id pub-id-type="doi"
                        >10.5281/ZENODO.4442563</pub-id></mixed-citation>
            </ref>
            <ref id="B50">
                <label>50</label>
                <mixed-citation publication-type="journal"><string-name><surname>Zschumme</surname>,
                            <given-names>P</given-names></string-name>. <month>Jan</month>.
                        <day>15</day>, <year>2021b</year>.
                        <article-title>IAM-CMS/Process-Manager</article-title>. Version 0.1.0.
                        <source>Zenodo</source>. DOI: <pub-id pub-id-type="doi"
                        >10.5281/ZENODO.4442553</pub-id></mixed-citation>
            </ref>
            <ref id="B51">
                <label>51</label>
                <mixed-citation publication-type="journal"><string-name><surname>Zschumme</surname>,
                            <given-names>P</given-names></string-name>, et al. <month>Oct</month>.
                        <day>16</day>, <year>2020</year>.
                        <article-title>IAM-CMS/Workflow-Nodes</article-title>. Version 0.1.0.
                        <source>Zenodo</source>. DOI: <pub-id pub-id-type="doi"
                        >10.5281/ZENODO.4094719</pub-id></mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
