<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Web-Scraping on Pi Stack</title>
    <link>https://www.pistack.xyz/tags/web-scraping/</link>
    <description>Recent content in Web-Scraping on Pi Stack</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Tue, 05 May 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://www.pistack.xyz/tags/web-scraping/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Gerapy vs Scrapyd vs Portia: Self-Hosted Web Scraping Management Platforms 2026</title>
      <link>https://www.pistack.xyz/posts/2026-05-05-gerapy-vs-scrapyd-vs-portia-self-hosted-web-scraping-management/</link>
      <pubDate>Tue, 05 May 2026 00:00:00 +0000</pubDate>
      <guid>https://www.pistack.xyz/posts/2026-05-05-gerapy-vs-scrapyd-vs-portia-self-hosted-web-scraping-management/</guid>
      <description>&lt;p&gt;Web scraping at a single-project scale is straightforward: write a Scrapy spider, run it from the command line, collect the results. But when you need to manage dozens of spiders across multiple projects, schedule recurring crawls, monitor execution status, and scale across distributed workers, a management platform becomes essential. Self-hosted scraping management gives you full control over crawl schedules, data storage, proxy rotation, and rate limiting — without depending on expensive cloud scraping services.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
