Back to roadmaps regex Course

Project: Parsing Nginx Access Logs with Node.js Regex

In this project, we will write a Node.js script that reads an Nginx access log file and uses regex to extract structured data: IP address, HTTP status code, and the request path.


1. Understanding the Nginx Log Format

A typical Nginx combined access log line looks like:

192.168.1.45 - - [16/Jun/2026:14:35:22 +0800] "GET /api/users HTTP/1.1" 200 1024 "-" "Mozilla/5.0"

2. Building the Log Parser

Create parse-logs.js:

// parse-logs.js
const fs = require('fs');
const readline = require('readline');

// Build regex with named groups to extract key fields
const LOG_REGEX = /^(?<ip>\S+)\s+\S+\s+\S+\s+\[.+?\]\s+"(?<method>\w+)\s+(?<path>\S+)\s+\S+"\s+(?<status>\d{3})\s+(?<bytes>\d+)/;

async function parseLogs(logFilePath) {
  const fileStream = fs.createReadStream(logFilePath);
  const rl = readline.createInterface({ input: fileStream });
  
  const results = [];

  for await (const line of rl) {
    const match = line.match(LOG_REGEX);
    if (match) {
      results.push({
        ip: match.groups.ip,
        method: match.groups.method,
        path: match.groups.path,
        status: parseInt(match.groups.status, 10),
        bytes: parseInt(match.groups.bytes, 10),
      });
    }
  }

  return results;
}

// Analysis: Count error status codes
parseLogs('./access.log').then(logs => {
  const errors = logs.filter(log => log.status >= 400);
  const topPaths = {};
  
  errors.forEach(log => {
    topPaths[log.path] = (topPaths[log.path] || 0) + 1;
  });

  console.log(`Total requests: ${logs.length}`);
  console.log(`Error responses (4xx/5xx): ${errors.length}`);
  console.log('Top error paths:', Object.entries(topPaths).sort((a, b) => b[1] - a[1]).slice(0, 5));
});
Published on Last updated: