RepoFlow Team · May 24, 2025

Mirror the Entire RubyGems Repository

Learn how to mirror the entire RubyGems repository to your local environment using a simple script. Perfect for offline or secure setups.

If you manage systems in isolated or secure environments, such as air-gapped networks or internal infrastructure, you may need full access to the RubyGems registry without relying on the public internet. This guide explains how to mirror the entire RubyGems repository locally using a simple and efficient script.

This solution is ideal for organizations that want full control over their Ruby dependencies or need to ensure availability regardless of external factors.
Prerequisites
You will need:
  1. Node.js 18 or higher
  2. A machine with enough storage space (the full RubyGems repository is large)
  3. An internet connection for the initial sync
Project Setup
Create a new folder and add the following package.json:
{
  "name": "mirror-ruby-gems",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "license": "MIT",
  "dependencies": {
    "axios": "^1.7.9",
    "cli-progress": "^3.12.0",
    "mkdirp": "^3.0.1",
    "p-limit": "^3.1.0"
  }
}
Install the dependencies:
npm install
The Mirroring Script
Create a new file named index.js in the root of your project and paste the following code inside:
const fs = require("fs");
const path = require("path");
const axios = require("axios");
const cliProgress = require("cli-progress");
const { mkdirp } = require("mkdirp");
const pLimit = require("p-limit");

const GEM_STORAGE_PATH = "./gems";
const CACHE_PATH = "./cache";
const CONCURRENT_DOWNLOADS = 5;
const BASE_URL = "https://rubygems.org";
const DOWNLOAD_CACHE_FILE = path.join(CACHE_PATH, "downloaded_gems.json");

mkdirp.sync(GEM_STORAGE_PATH);
mkdirp.sync(CACHE_PATH);

const readJSON = (filePath) => {
  try {
    return JSON.parse(fs.readFileSync(filePath, "utf8"));
  } catch {
    return {};
  }
};

const writeJSON = (filePath, data) => {
  fs.writeFileSync(filePath, JSON.stringify(data, null, 2));
};

const downloadedGems = readJSON(DOWNLOAD_CACHE_FILE);

async function fetchAllGems() {
  const cacheFile = path.join(CACHE_PATH, "all_gems.json");
  if (fs.existsSync(cacheFile)) return readJSON(cacheFile);

  console.log("Fetching all gem names...");
  const response = await axios.get(`${BASE_URL}/api/v1/gems.json`);
  const gemNames = response.data.map((g) => g.name);
  writeJSON(cacheFile, gemNames);
  return gemNames;
}

async function fetchGemVersions(gemName) {
  const cacheFile = path.join(CACHE_PATH, `versions_${gemName}.json`);
  if (fs.existsSync(cacheFile)) return readJSON(cacheFile);

  try {
    const response = await axios.get(`${BASE_URL}/api/v1/versions/${gemName}.json`);
    const versions = response.data.map((v) => v.number);
    writeJSON(cacheFile, versions);
    return versions;
  } catch (err) {
    console.warn(`Failed to fetch versions for ${gemName}: ${err.message}`);
    return [];
  }
}

async function downloadGem(gemName, version) {
  const gemFileName = `${gemName}-${version}.gem`;
  const gemFilePath = path.join(GEM_STORAGE_PATH, gemFileName);
  const gemUrl = `${BASE_URL}/downloads/${gemFileName}`;

  if (fs.existsSync(gemFilePath) || downloadedGems[gemFileName]) return;

  try {
    const response = await axios({ url: gemUrl, method: "GET", responseType: "stream" });
    const writer = fs.createWriteStream(gemFilePath);
    response.data.pipe(writer);
    await new Promise((resolve, reject) => {
      writer.on("finish", resolve);
      writer.on("error", reject);
    });
    downloadedGems[gemFileName] = true;
    writeJSON(DOWNLOAD_CACHE_FILE, downloadedGems);
  } catch (err) {
    console.error(`Failed to download ${gemFileName}: ${err.message}`);
  }
}

async function downloadAllGems() {
  const gemNames = await fetchAllGems();
  const limit = pLimit(CONCURRENT_DOWNLOADS);
  const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);

  progressBar.start(gemNames.length, 0);

  for (const gemName of gemNames) {
    const versions = await fetchGemVersions(gemName);
    await Promise.all(versions.map((v) => limit(() => downloadGem(gemName, v))));
    progressBar.increment();
  }

  progressBar.stop();
  console.log("All gems mirrored successfully.");
}

downloadAllGems().catch((err) => {
  console.error("Unexpected error:", err);
});
Once the file is created and dependencies are installed, run the script with:
node index.js
    Notes
    1. This script mirrors every version of every gem
    2. You can modify it to only download the latest version of each gem if preferred
    3. A local cache allows resuming if the process is interrupted
    4. The script uses concurrency to speed up the process while being respectful of the RubyGems servers
    Conclusion
    This script gives you full control over the RubyGems ecosystem in your own environment. It is suitable for high-security networks, internal developer infrastructure, or anyone looking to reduce external dependencies.
    If you need to mirror other ecosystems such as npm, PyPI, Maven, or Docker, reach out to us at hello@repoflow.io. We would be happy to help.