- Published on
Puppeteer Login Automation: How to Retain User Login Session
- Authors
- Name
- Ashik Nesin
- @AshikNesin
Lately, I'm working on my personal finance app needs to get data from a web app which does not provide an official API.
So I thought to automate it.
The workflow is simple:
- Login
- Go to the dashboard
- Wait for a particular API endpoint to load
- Then, scrape the data from it. (Save it as CSV, webhooks, etc)
It did work as expected, however, every time I run the script I had to log in to that app again and again.
It makes things simple and potentially rise alarms in their end because of repeated logins.
So I've made a little tweak to that workflow.
Check if login is needed, if so login
We can accomplish this by taking advantage of storing the cookies and other things from the website and then restoring it before running our site. (Just like how it happens in our real browser)
Here's how I did it:
For context, I'm using Supabase Storage for storing and retriving the data. We can replace it with anything else like AWS S3.
Here is the updated workflow. And I've covered each of them in detail in their respective blog post.
- Check if a Puppeteer User Date directory available in Storage
- If it's available, download it and then unzip it to a temporary directory
- Configure the temp directory in Google Puppeteer launch configuration
- Execute your custom Puppeteer script
- Zip the user data and store it.
Let's get started with the code
Dependencies
npm i @supabase/supabase-js adm-zip tempy puppeteer
And make sure to configure your SUPABASE_URL
and SUPABASE_PRIVATE_KEY
in env variable.
Snippet
Supabase helper
// supabaseAdminClient.js
import { createClient } from '@supabase/supabase-js';
const SUPABASE_URL = process.env.SUPABASE_URL;
const SUPABASE_PRIVATE_KEY = process.env.SUPABASE_PRIVATE_KEY;
const supabaseAdmin = createClient(SUPABASE_URL, SUPABASE_PRIVATE_KEY);
export default supabaseAdmin;
Upload and Download file
// storage.js
import supabaseAdmin from './supabaseAdminClient';
export const uploadFile = async (bucketName, filePath, fileData, options) => {
const { error } = await supabaseAdmin.storage
.from(bucketName)
.upload(filePath, fileData, options);
if (error) {
throw error;
}
console.log(`File uploaded to ${filePath}`);
};
export const downloadFile = async (bucketName, filePath, downloadFilePath) => {
const { data, error } = await supabaseAdmin.storage
.from(bucketName)
.download(filePath);
if (error) {
throw error;
}
const buffer = Buffer.from(await data.arrayBuffer());
await fs.writeFile(downloadFilePath, buffer);
console.log(`File downloaded to ${downloadFilePath}`);
};
const checkFileExists = async (bucketName, filePath) => {
const { data, error } = await supabaseAdmin.storage
.from(bucketName)
.list(filePath)
if (error) {
console.error(error)
return false
}
const files = data.filter(item => item.name === filePath)
return files.length > 0
};
Zip and unzip helper
import AdmZip from 'adm-zip';
const zipDirectory = async (sourceDir, outputFilePath) => {
const zip = new AdmZip();
zip.addLocalFolder(sourceDir);
await zip.writeZipPromise(outputFilePath);
console.log(`Zip file created: ${outputFilePath}`);
};
const unzipDirectory = async (inputFilePath, outputDirectory) => {
console.log({ inputFilePath, outputDirectory });
const zip = new AdmZip(inputFilePath);
return new Promise((resolve, reject) => {
zip.extractAllToAsync(outputDirectory, true, (error) => {
if (error) {
console.log(error);
reject(error);
} else {
console.log(`Extracted to "${outputDirectory}" successfully`);
resolve();
}
});
});
};
export { zipDirectory, unzipDirectory };
Puppeteer Helper
// get-browser.js
import puppeteer from "puppeteer";
import { executablePath } from "puppeteer";
import { temporaryDirectory, temporaryFile } from "tempy";
import fs from "fs";
import supabaseAdmin from "./supabaseAdminClient";
import { unzipDirectory } from "./zip-directory";
import { checkFileExists, downloadFile } from "./storage";
export const getBrowser = async ({
bucketName,
zipFilePath,
userDataDirPath = temporaryDirectory(),
}) => {
const storage = supabaseAdmin.storage;
const launchConfig = {
headless: true,
userDataDir: userDataDirPath,
args: [
"--no-sandbox", // Add this flag to disable the sandbox
"--disable-setuid-sandbox",
],
executablePath: executablePath(),
};
console.log(launchConfig);
if (!zipFilePath) {
return puppeteer.launch(launchConfig);
}
// Check if the userDataDir directory exists in Supabase Storage
let userDataDirExistsInStorage = await checkFileExists(
bucketName,
zipFilePath
);
if (userDataDirExistsInStorage) {
try {
console.log(`Download the userDataDir.zip`);
// Download the userDataDir.zip file from Supabase Storage to a local file system
const downloadFilePath = temporaryFile({ extension: "zip" });
console.log({ downloadFilePath });
await downloadFile(bucketName, zipFilePath, downloadFilePath);
await unzipDirectory(downloadFilePath, userDataDirPath);
console.log(`End: Download the userDataDir.zip`);
return puppeteer.launch(launchConfig);
} catch (error) {
console.log(error);
}
}
return puppeteer.launch({ ...launchConfig });
};
Actual Puppeteer Script
import supabaseAdmin from "./supabaseAdminClient";
import { zipDirectory } from "./zip-directory";
import { getBrowser } from "./get-browser";
import { temporaryFile, temporaryDirectory } from "tempy";
import { uploadFile } from "./storage";
import fs from "fs";
const exampleWorkflow = async () => {
try {
const bucketName = "example-browser-data";
const zipFilePath = "example-app.zip";
const userDataDirPath = temporaryDirectory();
const browser = await getBrowser({
bucketName,
zipFilePath,
userDataDirPath,
});
const page = await browser.newPage();
// Do something
await browser?.close?.();
// zip and upload the file
const uploadFilePath = temporaryFile({ extension: "zip" });
console.log(`start: Zip the userDataDir directory to a local file system`);
// Zip the userDataDir directory to a local file system
await zipDirectory(userDataDirPath, uploadFilePath);
console.log(`zip done`);
// Upload the userDataDir.zip file to Supabase Storage
console.log("start: file upload");
const userDataDirZipFile = fs.createReadStream(uploadFilePath);
await uploadFile(bucketName, zipFilePath, userDataDirZipFile, {
contentType: "application/zip",
upsert: true,
});
console.log("end: file upload");
console.log(`End: Zip the userDataDir directory to a local file system`);
await fs.promises.rm(downloadFilePath, { recursive: true, force: true });
console.log("File deleted!");
await fs.promises.rm(userDataDirPath, { recursive: true, force: true });
console.log("User directory deleted!");
return "done";
} catch (error) {
console.log(error);
throw error;
} finally {
await browser?.close?.();
}
};
exampleWorkflow();
Happy fewer logins!