Faker.js to Populate Your Database with Realistic Test Data

Kam ChehresaKam Chehresa
5 min read

Introduction

Faker.js is a powerful library that generates massive amount of real-life-fake-data for testing and development purposes. When building applications, striving for having realistic test data is crucial for proper development and testing.

Github repository: https://github.com/KazChe/fakerjs-ramblings

Key Features and Benefits

  • Generates realistic data across many categories (names, emails, addresses, etc.)

  • Supports multiple locales for internationalized data

  • Highly customizable and extensible

  • Perfect for seeding development databases

  • Helps avoid manual data entry for testing

Common Use Cases

// Generate user data
const user = {
  name: faker.person.fullName(),
  email: faker.internet.email(),
  avatar: faker.image.avatar(),
  address: faker.location.streetAddress(),
  bio: faker.lorem.paragraph()
};

// Generate product data
const product = {
  name: faker.commerce.productName(),
  price: faker.commerce.price(),
  description: faker.commerce.productDescription(),
  category: faker.commerce.department()
};

Integration with Database Seeding

const createFakeUser = () => ({
  firstName: faker.person.firstName(),
  lastName: faker.person.lastName(),
  email: faker.internet.email(),
  createdAt: faker.date.past(),
  updatedAt: faker.date.recent()
});

// Generate 1000 users
exports.seed = async function(knex) {
  const fakeUsers = Array.from({ length: 1000 }, createFakeUser);
  await knex('users').insert(fakeUsers);
};

Best Practices

  1. Seed Data Consistency: Use fixed seeds for reproducible results
faker.seed(123); // Sets a fixed seed for consistent results
  1. Relationships: Maintain referential integrity when seeding related tables

     const userId = faker.string.uuid();
     const user = {
       id: userId,
       name: faker.person.fullName()
     };
     const userPost = {
       title: faker.lorem.sentence(),
       userId: userId // Maintains relationship
     };
    
    1. Locale Support: Use appropriate locales for international
faker.setLocale('es'); // Spanish data
faker.setLocale('de'); // German data

Advanced Database Seeding Patterns

1. Relationships and Foreign Keys

// Create consistent relationships between tables
const createCompanyWithEmployees = () => {
  const companyId = faker.string.uuid();

  const company = {
    id: companyId,
    name: faker.company.name(),
    catchPhrase: faker.company.catchPhrase(),
    industry: faker.company.buzzNoun()
  };

  const employees = Array.from({ length: faker.number.int({ min: 5, max: 20 }) }, () => ({
    id: faker.string.uuid(),
    companyId: companyId,
    firstName: faker.person.firstName(),
    lastName: faker.person.lastName(),
    role: faker.person.jobTitle(),
    department: faker.commerce.department(),
    salary: faker.number.int({ min: 30000, max: 150000 })
  }));

  return { company, employees };
};

2. Custom Generators for Domain-Specific Data

// Extend Faker with your own generators
const customGenerator = {
  projectStatus() {
    return faker.helpers.arrayElement(['PLANNING', 'IN_PROGRESS', 'REVIEW', 'COMPLETED']);
  },

  sprintNumber() {
    return `SP-${faker.number.int({ min: 1, max: 999 })}`;
  },

  ticketPriority() {
    return faker.helpers.weightedArrayElement([
      { weight: 0.1, value: 'CRITICAL' },
      { weight: 0.2, value: 'HIGH' },
      { weight: 0.4, value: 'MEDIUM' },
      { weight: 0.3, value: 'LOW' }
    ]);
  }
};

3. Batch Processing for Large Datasets

async function seedLargeDataset(knex, batchSize = 1000) {
  const totalRecords = 1000000;
  const batches = Math.ceil(totalRecords / batchSize);

  console.log(`Seeding ${totalRecords} records in ${batches} batches`);

  for (let i = 0; i < batches; i++) {
    const records = Array.from({ length: batchSize }, () => ({
      id: faker.string.uuid(),
      data: faker.helpers.multiple(createFakeData, { count: 5 })
    }));

    await knex('large_table').insert(records);
    console.log(`Completed batch ${i + 1}/${batches}`);
  }
}

4. Locale-Aware Seeding

const seedInternationalUsers = async (knex) => {
  const locales = ['en', 'es', 'fr', 'de', 'ja'];

  for (const locale of locales) {
    faker.setLocale(locale);

    const users = Array.from({ length: 100 }, () => ({
      name: faker.person.fullName(),
      address: faker.location.streetAddress(),
      city: faker.location.city(),
      phone: faker.phone.number(),
      locale: locale
    }));

    await knex('international_users').insert(users);
  }
};

5. Consistent Test Data

function setupTestData() {
  // Set a fixed seed for reproducible test data
  faker.seed(123);

  const testUser = {
    id: faker.string.uuid(),
    email: faker.internet.email(),
    username: faker.internet.userName(),
    profile: {
      avatar: faker.image.avatar(),
      bio: faker.person.bio(),
      location: faker.location.city()
    }
  };

  return testUser;
}

Basic Setup

import { faker } from '@faker-js/faker';

// Optional: Set a seed for reproducible results
faker.seed(123);

More Advanced Data Generation Examples

const createUserProfile = () => {
  const firstName = faker.person.firstName();
  const lastName = faker.person.lastName();

  return {
    id: faker.string.uuid(),
    firstName,
    lastName,
    email: faker.internet.email({ firstName, lastName }),
    avatar: faker.image.avatar(),
    job: {
      title: faker.person.jobTitle(),
      area: faker.person.jobArea(),
      type: faker.person.jobType(),
      descriptor: faker.person.jobDescriptor()
    },
    address: {
      street: faker.location.streetAddress(),
      city: faker.location.city(),
      state: faker.location.state(),
      zipCode: faker.location.zipCode(),
      coordinates: {
        lat: faker.location.latitude(),
        lng: faker.location.longitude()
      }
    },
    internet: {
      username: faker.internet.userName({ firstName, lastName }),
      password: faker.internet.password(),
      userAgent: faker.internet.userAgent()
    }
  };
};

2. E-commerce Product Data

const createProduct = () => ({
  id: faker.string.uuid(),
  name: faker.commerce.productName(),
  description: faker.commerce.productDescription(),
  price: faker.commerce.price(),
  category: faker.commerce.department(),
  images: Array.from({ length: 3 }, () => ({
    url: faker.image.url(),
    alt: faker.lorem.sentence(),
    isPrimary: faker.datatype.boolean()
  })),
  metadata: {
    createdAt: faker.date.past(),
    updatedAt: faker.date.recent(),
    stock: faker.number.int({ min: 0, max: 1000 }),
    sku: faker.string.alphanumeric(8).toUpperCase()
  }
});

3. Company Data with Business Logic

const createCompany = () => ({
  id: faker.string.uuid(),
  name: faker.company.name(),
  catchPhrase: faker.company.catchPhrase(),
  description: `${faker.company.buzzPhrase()} ${faker.company.buzzVerb()} ${faker.company.buzzAdjective()}`,
  employees: Array.from(
    { length: faker.number.int({ min: 5, max: 20 }) },
    createUserProfile
  ),
  contacts: {
    email: faker.internet.email(),
    phone: faker.phone.number(),
    website: faker.internet.url()
  },
  address: {
    headquarters: faker.location.streetAddress(true),
    coordinates: {
      lat: faker.location.latitude(),
      lng: faker.location.longitude()
    }
  }
});

4. Database Seeding with Relationships

async function seedDatabase(knex) {
  // Ensure consistent data with seed
  faker.seed(123);

  // Create companies first
  const companies = Array.from({ length: 10 }, createCompany);
  await knex('companies').insert(companies);

  // Create users with company relationships
  const users = companies.flatMap(company => 
    Array.from({ length: faker.number.int({ min: 5, max: 20 }) }, () => ({
      ...createUserProfile(),
      companyId: company.id
    }))
  );
  await knex('users').insert(users);

  // Create products with company relationships
  const products = companies.flatMap(company =>
    Array.from({ length: faker.number.int({ min: 10, max: 50 }) }, () => ({
      ...createProduct(),
      manufacturerId: company.id
    }))
  );
  await knex('products').insert(products);
}

Performance Considerations

  • Use batch inserts for large datasets

  • Consider using streams for very large datasets

  • Cache repeated random generations

  • Use faker.helpers.multiple() for generating arrays of data

Real Example - See GitHub Repository

src/
├── config/
│ ├── database.js # Database configuration
│ └── schema.sql # SQL schema definitions
├── models/
│ ├── index.js # Model relationships
│ ├── User.js # User model
│ ├── Company.js # Company model
│ └── Product.js # Product model
├── seeds/
│ ├── seed-database.js # Main seeder
│ ├── seed-large-dataset.js # Large dataset seeder
│ └── seed-international-users.js # International seeder
└── scripts/
└── init-db.js # Database initialization
0
Subscribe to my newsletter

Read articles from Kam Chehresa directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Kam Chehresa
Kam Chehresa

All opinions are my own.