7500 hotels price research: scraping using JavaScript module hotels-scraper-js and data analysis
About
This demo project is a practical showcase of using hotels-scraper-js
NPM package plus how extracted data could be used in exploratory data analysis.
hotels-scraper-js
is an open source tool designed to parse popular website hotels from a single module. It is used to scrape hotels list and hotel info from Airbnb, Booking, and Hotels.com websites.
You can download the dataset at Kaggle: 7500 hotels from Airbnb, Booking and Hotels.com.
I'll cover:
Getting 500 hotel data (for each website) from each city from the list (I take 5 European capitals for this example).
Getting hotels with the lowest, median, and highest prices.
Saving parsed data in
.json
files.Saving charts images with analyzed data.
Full code (if you don't need an explanation)
import fs from "fs/promises";
import { ChartJSNodeCanvas } from "chartjs-node-canvas";
import { airbnb, booking, hotelsCom, saveToJSON } from "hotels-scraper-js";
const locations = ["London", "Berlin", "Paris", "Madrid", "Rome"];
const getHotels = async (location) => {
const airbnbHotels = await airbnb.getHotels(undefined, "USD", 500, location);
const bookingHotels = await booking.getHotels(undefined, "USD", 500, location);
const hotelsComHotels = await hotelsCom.getHotels(undefined, undefined, undefined, "United States", undefined, 500, location);
return { airbnbHotels, bookingHotels, hotelsComHotels };
};
const getResults = async () => {
const results = {};
for (const location of locations) {
// There are two options (only one can be uncommented):
// 1. If you need to get and save parsed data into .json files:
const hotels = await getHotels(location);
saveToJSON(hotels, `data/${location}`);
// 2. If you saved files earlier and now you need to read data from these files:
// const hotels = await fs.readFile(`data/${location}.json`, "utf-8");
// results[`${location}`] = JSON.parse(hotels);
results[`${location}`] = hotels;
}
return results;
};
const saveChart = async (chartData, labels, chartName, title) => {
const width = 500;
const height = 500;
const backgroundColour = "white";
const chartJSNodeCanvas = new ChartJSNodeCanvas({ width, height, backgroundColour });
const configuration = {
type: "bar",
data: {
labels,
datasets: [
{
data: chartData,
backgroundColor: [
"rgba(255, 99, 132, 0.2)",
"rgba(255, 159, 64, 0.2)",
"rgba(75, 192, 192, 0.2)",
"rgba(54, 162, 235, 0.2)",
"rgba(153, 102, 255, 0.2)",
],
borderColor: ["rgb(255, 99, 132)", "rgb(255, 159, 64)", "rgb(75, 192, 192)", "rgb(54, 162, 235)", "rgb(153, 102, 255)"],
borderWidth: 1,
},
],
},
options: {
scales: {
y: {
title: {
display: true,
text: "USD",
},
},
},
plugins: {
title: {
display: true,
text: title,
},
legend: {
display: false,
},
},
},
};
const base64Image = await chartJSNodeCanvas.renderToDataURL(configuration);
const base64Data = base64Image.replace(/^data:image\/png;base64,/, "");
fs.writeFile(`${chartName}.png`, base64Data, "base64")
.then(() => console.log("Chart saved!"))
.catch(console.log);
};
getResults().then(async (results) => {
const airbnbLowest = [];
const bookingLowest = [];
const hotelsComLowest = [];
const airbnbHighest = [];
const bookingHighest = [];
const hotelsComHighest = [];
const airbnbMedian = [];
const bookingMedian = [];
const hotelsComMedian = [];
for (const location of locations) {
const airbnbPrices = results[location].airbnbHotels.map((el) => el.price.value).sort((a, b) => a - b);
const bookingPrices = results[location].bookingHotels.map((el) => el.price.value).sort((a, b) => a - b);
const hotelsComPrices = results[location].hotelsComHotels.map((el) => el.price.value).sort((a, b) => a - b);
airbnbLowest.push(airbnbPrices[0]);
bookingLowest.push(bookingPrices[0]);
hotelsComLowest.push(hotelsComPrices[0]);
airbnbHighest.push(airbnbPrices[airbnbPrices.length - 1]);
bookingHighest.push(bookingPrices[bookingPrices.length - 1]);
hotelsComHighest.push(hotelsComPrices[hotelsComPrices.length - 1]);
airbnbMedian.push(
airbnbPrices.length % 2 === 0
? (airbnbPrices[airbnbPrices.length / 2 - 1] + airbnbPrices[airbnbPrices.length / 2]) / 2
: Math.ceil(airbnbPrices[airbnbPrices.length / 2])
);
bookingMedian.push(
bookingPrices.length % 2 === 0
? (bookingPrices[bookingPrices.length / 2 - 1] + bookingPrices[bookingPrices.length / 2]) / 2
: Math.ceil(bookingPrices[bookingPrices.length / 2])
);
hotelsComMedian.push(
hotelsComPrices.length % 2 === 0
? (hotelsComPrices[hotelsComPrices.length / 2 - 1] + hotelsComPrices[hotelsComPrices.length / 2]) / 2
: Math.ceil(hotelsComPrices[hotelsComPrices.length / 2])
);
}
await saveChart(airbnbLowest, locations, `charts/airbnbLowest`, "Airbnb Lowest Prices");
await saveChart(bookingLowest, locations, `charts/bookingLowest`, "Booking Lowest Prices");
await saveChart(hotelsComLowest, locations, `charts/hotelsComLowest`, "Hotels.com Lowest Prices");
await saveChart(airbnbHighest, locations, `charts/airbnbHighest`, "Airbnb Highest Prices");
await saveChart(bookingHighest, locations, `charts/bookingHighest`, "Booking Highest Prices");
await saveChart(hotelsComHighest, locations, `charts/hotelsComHighest`, "Hotels.com Highest Prices");
await saveChart(airbnbMedian, locations, `charts/airbnbMedian`, "Airbnb Median Prices");
await saveChart(bookingMedian, locations, `charts/bookingMedian`, "Booking Median Prices");
await saveChart(hotelsComMedian, locations, `charts/hotelsComMedian`, "Hotels.com Median Prices");
});
Code explanation
Preparation
First, we need to create a Node.js* project and add npm
packages hotels-scraper-js
to parse hotel lists, chart.js
to build chart from received data and chartjs-node-canvas
to render chart with Chart.js using canvas
.
To do this, in the directory with our project, open the command line and enter:
$ npm init -y
And then:
$ npm i hotels-scraper-js chart.js chartjs-node-canvas
*If you don't have Node.js installed, you can download it from nodejs.org and follow the installation documentation.
Process
๐Note: Only ES modules import statement is available.
First, we need to import the required modules and define the locations
variable with the cities in which need to search hotels:
import fs from "fs/promises";
import { ChartJSNodeCanvas } from "chartjs-node-canvas";
import { airbnb, booking, hotelsCom, saveToJSON } from "hotels-scraper-js";
const locations = ["London", "Berlin", "Paris", "Madrid", "Rome"];
Then we write getHotels
function that takes location
and returns hotel lists from each website:
const getHotels = async (location) => {
const airbnbHotels = await airbnb.getHotels(undefined, "USD", 500, location);
const bookingHotels = await booking.getHotels(undefined, "USD", 500, location);
const hotelsComHotels = await hotelsCom.getHotels(undefined, undefined, undefined, "United States", undefined, 500, location);
return { airbnbHotels, bookingHotels, hotelsComHotels };
};
In this function, we define currency ("USD") and set the number of results (500) for each parser.
Next, we write getResults
function that handles what we want: get results from the parser, save and return them or read saved earlier data from files and return it:
const getResults = async () => {
const results = {};
for (const location of locations) {
// There are two options (only one can be uncommented):
// 1. If you need to get and save parsed data into .json files:
const hotels = await getHotels(location);
saveToJSON(hotels, `data/${location}`);
results[`${location}`] = hotels;
// 2. If you saved files earlier and now you need to read data from these files:
// const hotels = await fs.readFile(`data/${location}.json`, "utf-8");
// results[`${location}`] = JSON.parse(hotels);
}
return results;
};
Next, we write saveChart
function that allows build and save chart images:
const saveChart = async (chartData, labels, chartName, title) => {
const width = 500;
const height = 500;
const backgroundColour = "white";
const chartJSNodeCanvas = new ChartJSNodeCanvas({ width, height, backgroundColour });
const configuration = {
type: "bar",
data: {
labels,
datasets: [
{
data: chartData,
backgroundColor: [
"rgba(255, 99, 132, 0.2)",
"rgba(255, 159, 64, 0.2)",
"rgba(75, 192, 192, 0.2)",
"rgba(54, 162, 235, 0.2)",
"rgba(153, 102, 255, 0.2)",
],
borderColor: ["rgb(255, 99, 132)", "rgb(255, 159, 64)", "rgb(75, 192, 192)", "rgb(54, 162, 235)", "rgb(153, 102, 255)"],
borderWidth: 1,
},
],
},
options: {
scales: {
y: {
title: {
display: true,
text: "USD",
},
},
},
plugins: {
title: {
display: true,
text: title,
},
legend: {
display: false,
},
},
},
};
const base64Image = await chartJSNodeCanvas.renderToDataURL(configuration);
const base64Data = base64Image.replace(/^data:image\/png;base64,/, "");
fs.writeFile(`${chartName}.png`, base64Data, "base64")
.then(() => console.log("Chart saved!"))
.catch(console.log);
};
And finally, we call getResults
function, wait for results, make some data operations and save charts:
getResults().then(async (results) => {
...
});
In this function first, we declare analyzed results arrays:
const airbnbLowest = [];
const bookingLowest = [];
const hotelsComLowest = [];
const airbnbHighest = [];
const bookingHighest = [];
const hotelsComHighest = [];
const airbnbMedian = [];
const bookingMedian = [];
const hotelsComMedian = [];
Then we need to fill each array with the necessary data. We need to sort data in ascending order, for lowest price we get the first elements, for highest - last. And for medians we get one (for odd length arrays) or two (for even length arrays) values from the middle of the arrays:
for (const location of locations) {
const airbnbPrices = results[location].airbnbHotels.map((el) => el.price.value).sort((a, b) => a - b);
const bookingPrices = results[location].bookingHotels.map((el) => el.price.value).sort((a, b) => a - b);
const hotelsComPrices = results[location].hotelsComHotels.map((el) => el.price.value).sort((a, b) => a - b);
airbnbLowest.push(airbnbPrices[0]);
bookingLowest.push(bookingPrices[0]);
hotelsComLowest.push(hotelsComPrices[0]);
airbnbHighest.push(airbnbPrices[airbnbPrices.length - 1]);
bookingHighest.push(bookingPrices[bookingPrices.length - 1]);
hotelsComHighest.push(hotelsComPrices[hotelsComPrices.length - 1]);
airbnbMedian.push(
airbnbPrices.length % 2 === 0
? (airbnbPrices[airbnbPrices.length / 2 - 1] + airbnbPrices[airbnbPrices.length / 2]) / 2
: Math.ceil(airbnbPrices[airbnbPrices.length / 2])
);
bookingMedian.push(
bookingPrices.length % 2 === 0
? (bookingPrices[bookingPrices.length / 2 - 1] + bookingPrices[bookingPrices.length / 2]) / 2
: Math.ceil(bookingPrices[bookingPrices.length / 2])
);
hotelsComMedian.push(
hotelsComPrices.length % 2 === 0
? (hotelsComPrices[hotelsComPrices.length / 2 - 1] + hotelsComPrices[hotelsComPrices.length / 2]) / 2
: Math.ceil(hotelsComPrices[hotelsComPrices.length / 2])
);
}
After all data operations, we save charts images:
await saveChart(airbnbLowest, locations, `charts/airbnbLowest`, "Airbnb Lowest Prices");
await saveChart(bookingLowest, locations, `charts/bookingLowest`, "Booking Lowest Prices");
await saveChart(hotelsComLowest, locations, `charts/hotelsComLowest`, "Hotels.com Lowest Prices");
await saveChart(airbnbHighest, locations, `charts/airbnbHighest`, "Airbnb Highest Prices");
await saveChart(bookingHighest, locations, `charts/bookingHighest`, "Booking Highest Prices");
await saveChart(hotelsComHighest, locations, `charts/hotelsComHighest`, "Hotels.com Highest Prices");
await saveChart(airbnbMedian, locations, `charts/airbnbMedian`, "Airbnb Median Prices");
await saveChart(bookingMedian, locations, `charts/bookingMedian`, "Booking Median Prices");
await saveChart(hotelsComMedian, locations, `charts/hotelsComMedian`, "Hotels.com Median Prices");
Results
Lowest price:
Airbnb
Booking
Hotels.com
Highest price:
Airbnb
Booking
Hotels.com
Median price:
Airbnb
Booking
Hotels.com
Links
If you want other functionality added to this demo project or if you want to see some other projects made with SerpApi, write me a message.
Add a Feature Request๐ซ or a Bug๐
Subscribe to my newsletter
Read articles from Mikhail Zub directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by