<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Data-Processing on Pi Stack</title>
    <link>https://www.pistack.xyz/tags/data-processing/</link>
    <description>Recent content in Data-Processing on Pi Stack</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Sat, 09 May 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://www.pistack.xyz/tags/data-processing/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Self-Hosted Batch Processing: Apache Spark vs Hadoop MapReduce vs Apache Tez (2026)</title>
      <link>https://www.pistack.xyz/posts/2026-05-09-self-hosted-batch-processing-spark-mapreduce-tez-guide/</link>
      <pubDate>Sat, 09 May 2026 00:00:00 +0000</pubDate>
      <guid>https://www.pistack.xyz/posts/2026-05-09-self-hosted-batch-processing-spark-mapreduce-tez-guide/</guid>
      <description>&lt;p&gt;Processing large-scale data in batch mode remains a foundational requirement for data engineering pipelines. Whether you are running ETL jobs, building data warehouses, training machine learning models, or generating nightly reports, choosing the right batch processing engine impacts cost, performance, and operational complexity.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
