<?xml version="1.0" encoding="utf-8"?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" dtd-version="1.4" article-type="research-article">
  <front>
    <journal-meta>
      <journal-id journal-id-type="issn">1991-6639</journal-id>
      <journal-id journal-id-type="eissn">2949-1940</journal-id>
      <journal-title-group>
        <journal-title xml:lang="ru">Известия Кабардино-Балкарского научного центра РАН</journal-title>
        <journal-title xml:lang="en">NEWS OF THE KABARDINO-BALKARIAN SCIENTIFIC CENTER OF RAS</journal-title>
      </journal-title-group>
      <publisher>
        <publisher-name>КБНЦ РАН</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.35330/1991-6639-2026-28-2-34-50</article-id>
      <article-id pub-id-type="edn">ISSQXZ</article-id>
      <article-id pub-id-type="uri">https://izvestiyakbncran.ru/index.php/28-2-3/</article-id>
      <article-categories>
        <subj-group>
          <subject>ИНФОРМАТИКА И ИНФОРМАЦИОННЫЕ ПРОЦЕССЫ</subject>
        </subj-group>
        <subj-group>
          <subject>INFORMATICS AND INFORMATION PROCESSES</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title xml:lang="ru">Архитектура распределенной системы хранения и обработки больших данных на основе Apache Ozone и Argo Workflows</article-title>
        <trans-title-group xml:lang="en">
          <trans-title>Architecture of a distributed storage and big data processing system based on Apache Ozone and Argo Workflows</trans-title>
        </trans-title-group>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <name name-style="eastern">
            <surname>Полянцева</surname>
            <given-names>Ксения Андреевна</given-names>
          </name>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Полянцева</surname>
              <given-names>Ксения Андреевна</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Polyantseva</surname>
              <given-names>Ksenia A.</given-names>
            </name>
          </name-alternatives>
          <email>k.a.poliantseva@mtuci.ru</email>
          <contrib-id contrib-id-type="orcid">0000-0002-7102-4208</contrib-id>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
        <contrib contrib-type="author">
          <name name-style="eastern">
            <surname>Комлев</surname>
            <given-names>Артем Владимирович</given-names>
          </name>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Комлев</surname>
              <given-names>Артем Владимирович</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Komlev</surname>
              <given-names>Artem V.</given-names>
            </name>
          </name-alternatives>
          <email>komlev1257@gmail.com</email>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
        <contrib contrib-type="author">
          <name name-style="eastern">
            <surname>Городничев</surname>
            <given-names>Михаил Геннадьевич</given-names>
          </name>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Городничев</surname>
              <given-names>Михаил Геннадьевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Gorodnichev</surname>
              <given-names>Mikhail G.</given-names>
            </name>
          </name-alternatives>
          <email>m.g.gorodnichev@mtuci.ru</email>
          <contrib-id contrib-id-type="orcid">0000-0003-1739-9831</contrib-id>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
        <aff-alternatives id="aff1">
          <aff>
            <institution xml:lang="ru">Московский технический университет связи и информатики (Москва, Россия)</institution>
          </aff>
          <aff>
            <institution xml:lang="en">Moscow Technical University of Communications and Informatics (Moscow, Russia)</institution>
          </aff>
        </aff-alternatives>
      </contrib-group>
      <pub-date pub-type="epub" iso-8601-date="2026-04-30">
        <day>30</day>
        <month>04</month>
        <year>2026</year>
      </pub-date>
      <pub-date date-type="collection">
        <year>2026</year>
      </pub-date>
      <volume>28</volume>
      <issue>2</issue>
      <fpage>34</fpage>
      <lpage>50</lpage>
      <history>
        <date date-type="received" iso-8601-date="2026-02-25">
          <day>25</day>
          <month>02</month>
          <year>2026</year>
        </date>
        <date date-type="accepted" iso-8601-date="2026-03-25">
          <day>25</day>
          <month>03</month>
          <year>2026</year>
        </date>
        <date date-type="rev-recd" iso-8601-date="2026-03-11">
          <day>11</day>
          <month>03</month>
          <year>2026</year>
        </date>
      </history>
      <permissions>
        <copyright-statement>Полянцева К. А., Комлев А. В., Городничев М. Г.</copyright-statement>
        <copyright-year>2026</copyright-year>
        <copyright-holder xml:lang="ru">Полянцева К. А., Комлев А. В., Городничев М. Г.</copyright-holder>
        <copyright-holder xml:lang="en">Polyantseva K.A., Komlev A.V., Gorodnichev M.G.</copyright-holder>
        <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>CC BY 4.0</license-p>
        </license>
      </permissions>
      <self-uri xlink:type="simple" xlink:href="https://izvestiyakbncran.ru/index.php/28-2-3/">https://izvestiyakbncran.ru/index.php/28-2-3/</self-uri>
      <abstract xml:lang="ru">
        <p>В статье рассматривается архитектура распределенной системы хранения и обработки больших данных, построенная на основе интеграции объектного хранилища Apache Ozone и системы оркестрации вычислительных процессов Argo Workflows. Цель исследования. Разработка и исследование архитектуры распределенной системы хранения и обработки больших данных, основанной на интеграции Apache Ozone и Argo Workflows, реализующей принцип разделения функций хранения и вычислений, а также оценка эффективности предложенного решения по сравнению с традиционной архитектурой Apache Hadoop. Методы исследования. Использованы методы системного анализа архитектур больших данных, сравнительного экспериментального тестирования распределенных систем хранения и обработки информации, а также методы математического моделирования для формализации процессов масштабирования ресурсов, времени выполнения вычислений и эффективности хранения данных. Экспериментальная оценка проводилась на кластерах Apache Ozone и Apache Hadoop с использованием Apache Spark для выполнения вычислительных задач. Результаты. Разработана архитектура распределенной системы, обеспечивающая независимое масштабирование подсистем хранения и вычислений за счет использования объектного хранилища Apache Ozone и оркестрации вычислительных процессов на базе Argo Workflows в контейнерной среде Kubernetes. Предложена методика интеграции компонентов без использования промежуточного S3-шлюза, позволяющая снизить накладные расходы взаимодействия. Проведенные экспериментальные исследования показали сопоставимую производительность предложенного решения с Hadoop-кластером при операциях чтения, записи и обработки данных, а также преимущества в гибкости масштабирования и эффективности использования дискового пространства при применении erasure coding. Выводы. Результаты исследования подтверждают перспективность использования архитектуры на основе Apache Ozone и Argo Workflows в качестве альтернативы традиционным платформам обработки больших данных. Раздельная архитектура хранения и вычислений позволяет повысить гибкость инфраструктуры, оптимизировать использование ресурсов и снизить затраты на хранение данных при сохранении сопоставимого уровня производительности. Предложенный подход может быть применен при построении корпоративных аналитических платформ, систем обработки больших данных и инфраструктур машинного обучения.</p>
      </abstract>
      <trans-abstract xml:lang="en">
        <p>The article discusses the architecture of a distributed big data storage and processing system based on the integration of the Apache Ozone object storage and the Argo Workflows computing process orchestration system. Aim. Development and research of the architecture of a distributed big data storage and processing system based on the integration of Apache Ozone and Argo Workflows, implementing the principle of separation of storage and computing functions, as well as evaluating the effectiveness of the proposed solution compared to the traditional Apache Hadoop architecture. Methods. Methods of system analysis of big data architectures, comparative experimental testing of distributed information storage and processing systems, as well as mathematical modeling methods are used to formalize the processes of scaling resources, computing time, and data storage efficiency. The experimental evaluation is carried out on Apache Ozone and Apache Hadoop clusters using Apache Spark to perform computational tasks. Results. A distributed system architecture has been developed that provides independent scaling of storage and computing subsystems through the use of Apache Ozone object storage and orchestration of computing processes based on Argo Workflows in the Kubernetes container environment. A method for integrating components without using an intermediate S3 gateway is proposed, which reduces the overhead costs of interaction. Experimental studies have shown comparable performance of the proposed solution with a Hadoop cluster for data reading, writing, and processing, as well as advantages in scaling flexibility and disk space efficiency when using erasure coding. Conclusions. The results of the study confirm the prospects of using architecture based on Apache Ozone and Argo Workflows as an alternative to traditional big data platforms. The separate storage and computing architecture allow for increased infrastructure flexibility, optimized resource usage, and lower data storage costs while maintaining comparable performance levels. The proposed approach can be applied in the construction of corporate analytical platforms, big data processing systems and machine learning infrastructures.</p>
      </trans-abstract>
      <kwd-group xml:lang="ru">
        <title>Ключевые слова</title>
        <kwd>распределенные системы хранения данных</kwd>
        <kwd>большие данные</kwd>
        <kwd>Apache Ozone</kwd>
        <kwd>Argo Workflows</kwd>
        <kwd>Kubernetes</kwd>
        <kwd>Apache Spark</kwd>
        <kwd>объектные хранилища</kwd>
        <kwd>разделение хранения и вычислений</kwd>
        <kwd>масштабируемость</kwd>
        <kwd>обработка данных</kwd>
        <kwd>контейнерные вычисления</kwd>
        <kwd>отказоустойчивость</kwd>
      </kwd-group>
      <kwd-group xml:lang="en">
        <title>Keywords</title>
        <kwd>distributed storage systems</kwd>
        <kwd>big data</kwd>
        <kwd>Apache Ozone</kwd>
        <kwd>Argo Workflows</kwd>
        <kwd>Kubernetes</kwd>
        <kwd>Apache Spark</kwd>
        <kwd>object storage</kwd>
        <kwd>separation of storage and computing</kwd>
        <kwd>scalability</kwd>
        <kwd>data processing</kwd>
        <kwd>container computing</kwd>
        <kwd>fault tolerance</kwd>
      </kwd-group>
      <funding-group>
        <funding-statement xml:lang="ru">Исследование проведено без спонсорской поддержки.</funding-statement>
        <funding-statement xml:lang="en">The study was performed without external funding.</funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <body/>
  <back>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <mixed-citation xml:lang="ru">Полянцева К. А. Высоконагруженная платформа для агрегации и анализа неструктурированных данных о состоянии дорожного полотна // Автоматизация в промышленности. 2022. № 5. С. 32–37. DOI: 10.25728/avtprom.2022.05.09</mixed-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <mixed-citation xml:lang="ru">Городничев М. Г., Титов Д. В., Липатова А. Д. О задаче построение независимых архитектур обработки данных в интеллектуальных транспортных системах // Инженерный вестник Дона. 2025. № 11(131). С. 62–92.</mixed-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <mixed-citation xml:lang="ru">Malik V. Hadoop Distributed file system (HDFS) with its architecture. International Journal for Research in Applied Science and Engineering Technology. 2025. Vol. 13. Pp. 6031–6034. DOI: 10.22214/ijraset.2025.71584</mixed-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <mixed-citation xml:lang="ru">Kala Karun A., Chitharanjan K. A review on Hadoop – HDFS infrastructure extensions. 2013 IEEE Conference on Information &amp; Communication Technologies, Thuckalay, India. 2013. Pp. 132–137. DOI: 10.1109/CICT.2013.6558077</mixed-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <mixed-citation xml:lang="ru">Zhu Z., Tan L., Li Y., Ji C. PHDFS: Optimizing I/O performance of HDFS in deep learning cloud computing platform. Journal of Systems Architecture. 2020. Vol. 109. Article 101810. DOI: 10.1016/j.sysarc.2020.101810</mixed-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <mixed-citation xml:lang="ru">Иевлев К. О., Городничев М. Г. Сравнительный анализ систем хранения данных HDFS и Apache Ozone // Computational Nanotechnology. 2025. Т. 12. № 1. С. 26–33. DOI: 10.33693/2313-223X-2025-12-1-26-33</mixed-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <mixed-citation xml:lang="ru">Wilkinson S. R., Aloqalaa M., Belhajjame K. et al. Applying the FAIR principles to computational workflows. Scientific Data. 2025. Vol. 12. Article 328. DOI: 10.1038/s41597-025-04451-9</mixed-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <mixed-citation xml:lang="ru">Gustafsson O.J.R., Wilkinson S.R., Bacall F. et al. WorkflowHub: a registry for computational workflows. Scientific Data. 2025. Vol. 12. Article 837. DOI: 10.1038/s41597-025-04786-3</mixed-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <mixed-citation xml:lang="ru">Tourouta E., Gorodnichev M., Polyantseva K., Moseva M. Providing fault tolerance of cluster computing systems based on fault-tolerant dynamic computation planning. Lecture Notes in Information Systems and Organisation: 3rd. Virtual, Online, 2022. Pp. 143–150. DOI: 10.1007/978-3-030-94252-6_10</mixed-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <mixed-citation xml:lang="ru">Kumar B., Verma A., Verma P. Introduction of kubernetes. Modern kubernetes: From core concepts to intelligent autoscaling for cloud applications. Cham: Springer, 2026. Pp. 1–15. (Studies in Autonomic, Data-driven and Industrial Computing). DOI: 10.1007/978-3-032-12972-7_1</mixed-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <mixed-citation xml:lang="ru">Aqasizade H., Ataie E., Bastam M. Kubernetes in action: Exploring the performance of Kubernetes distributions in the cloud. Software: Practice and Experience. 2025. Vol. 55. Pp. 1711–1725. DOI: 10.1002/spe.70000</mixed-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <mixed-citation xml:lang="ru">Lucani D., Feher M. HyRES: A hybrid replication and erasure coding approach to data storage. 2025. 14 p. arXiv: 2511.00896. URL: https://arxiv.org/abs/2511.00896 (аccessed: 22/02/2026)</mixed-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <mixed-citation xml:lang="ru">Shen Z., Cai Y., Cheng K., Lee P. P. C., Li X., Hu Y., Shu J. A survey of the past, present, and future of erasure coding for storage systems. ACM Transactions on Storage. 2025. Vol. 21. No. 1. Article 4. 39 p. DOI: 10.1145/3708994</mixed-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <mixed-citation xml:lang="ru">Ibrahim S., Darrous J. Erasure coding aware block placement for data-intensive applications. ACM SIGOPS Operating Systems Review. 2025. Vol. 59. No. 1. Pp. 62–69. DOI: 10.1145/3759441.3759451</mixed-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <mixed-citation xml:lang="en">Polyantseva K. A. High-load platform for aggregation and analysis of unstructured data on road surface condition. Avtomatizatsiya v promyshlennosti [Automation in Industry]. 2022. No. 5. Pp. 32–37. DOI: 10.25728/avtprom.2022.05.09. (In Russian)</mixed-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <mixed-citation xml:lang="en">Gorodnichev M.G., Titov D.V., Lipatova A.D. On problem of constructing independent data processing architectures in intelligent transport systems. Inzhenernyy vestnik Dona [Engineering Bulletin of the Don]. 2025. No. 11(131). Pp. 62–92. (In Russian)</mixed-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <mixed-citation xml:lang="en">Malik V. Hadoop Distributed file system (HDFS) with its architecture. International Journal for Research in Applied Science and Engineering Technology. 2025. Vol. 13. Pp. 6031–6034. DOI: 10.22214/ijraset.2025.71584</mixed-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <mixed-citation xml:lang="en">Kala Karun A., Chitharanjan K. A review on Hadoop – HDFS infrastructure extensions. 2013 IEEE Conference on Information &amp; Communication Technologies, Thuckalay, India. 2013. Pp. 132–137. DOI: 10.1109/CICT.2013.6558077</mixed-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <mixed-citation xml:lang="en">Zhu Z., Tan L., Li Y., Ji C. PHDFS: Optimizing I/O performance of HDFS in deep learning cloud computing platform. Journal of Systems Architecture. 2020. Vol. 109. Article 101810. DOI: 10.1016/j.sysarc.2020.101810</mixed-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <mixed-citation xml:lang="en">Ievlev K.O., Gorodnichev M.G. Comparative analysis of HDFS and Apache Ozone data storage systems. Computational Nanotechnology. 2025. Vol. 12. No. 1. Pp. 26–33. DOI: 10.33693/2313-223X-2025-12-1-26-33. (In Russian)</mixed-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <mixed-citation xml:lang="en">Wilkinson S. R., Aloqalaa M., Belhajjame K. et al. Applying the FAIR principles to computational workflows. Scientific Data. 2025. Vol. 12. Article 328. DOI: 10.1038/s41597-025-04451-9</mixed-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <mixed-citation xml:lang="en">Gustafsson O.J.R., Wilkinson S.R., Bacall F. et al. WorkflowHub: a registry for computational workflows. Scientific Data. 2025. Vol. 12. Article 837. DOI: 10.1038/s41597-025-04786-3</mixed-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <mixed-citation xml:lang="en">Tourouta E., Gorodnichev M., Polyantseva K., Moseva M. Providing fault tolerance of cluster computing systems based on fault-tolerant dynamic computation planning. Lecture Notes in Information Systems and Organisation: 3rd. Virtual, Online, 2022. Pp. 143–150. DOI: 10.1007/978-3-030-94252-6_10</mixed-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <mixed-citation xml:lang="en">Kumar B., Verma A., Verma P. Introduction of kubernetes. Modern kubernetes: From core concepts to intelligent autoscaling for cloud applications. Cham: Springer, 2026. Pp. 1–15. (Studies in Autonomic, Data-driven and Industrial Computing). DOI: 10.1007/978-3-032-12972-7_1</mixed-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <mixed-citation xml:lang="en">Aqasizade H., Ataie E., Bastam M. Kubernetes in action: Exploring the performance of Kubernetes distributions in the cloud. Software: Practice and Experience. 2025. Vol. 55. Pp. 1711–1725. DOI: 10.1002/spe.70000</mixed-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <mixed-citation xml:lang="en">Lucani D., Feher M. HyRES: A hybrid replication and erasure coding approach to data storage. 2025. 14 p. arXiv: 2511.00896. URL: https://arxiv.org/abs/2511.00896 (аccessed: 22/02/2026)</mixed-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <mixed-citation xml:lang="en">Shen Z., Cai Y., Cheng K., Lee P. P. C., Li X., Hu Y., Shu J. A survey of the past, present, and future of erasure coding for storage systems. ACM Transactions on Storage. 2025. Vol. 21. No. 1. Article 4. 39 p. DOI: 10.1145/3708994</mixed-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <mixed-citation xml:lang="en">Ibrahim S., Darrous J. Erasure coding aware block placement for data-intensive applications. ACM SIGOPS Operating Systems Review. 2025. Vol. 59. No. 1. Pp. 62–69. DOI: 10.1145/3759441.3759451</mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>
