npm config set sharp_binary_host "https://npm.taobao.org/mirrors/sharp"
npm config set sharp_libvips_binary_host "https://npm.taobao.org/mirrors/sharp-libvips"
代码示例:
import fs from 'fs';
import http from 'http';
import https from 'https';
import sharp from 'sharp';
const downloadImage = (imageUrl: string): Promise => {
return new Promise((resolve, reject) => {
(imageUrl.startsWith('https') ? https : http).get(imageUrl, (res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk) => chunks.push(chunk));
res.on('end', (error: Error) => {
if (error) return reject(error);
const size = chunks.map(d => d.length).reduce((val, total) => val + total, 0);
resolve(Buffer.concat(chunks, size));
});
});
});
};
const getCodeImgUrl = () => `https://lzw.me/getcaptcha?theme=light&_pc=${Date.now()}`;
export const saveImages = async (total = 100, nameBase = 10000, baseDir = './images/captcha/') => {
if (!fs.existsSync(baseDir)) fs.mkdirSync(baseDir, { recursive: true });
console.log('等待下载的图片数量:', total);
await downloadImage(getCodeImgUrl()).then(buf => {
return sharp(buf).png().toFile(path.resolve(baseDir, `${nameBase++}.png`));
});
await new Promise(rs => setTimeout(() => rs(null), Math.ceil(Math.random() * 700 + 300)));
if (--total) await saveImages(total, nameBase, baseDir);
};
saveImages(10, 100, './images/captcha_1/');
然后可以人肉识别,将验证码图片重命名为其对应的验证码值,以便用于后续的训练,有重名的可以加上后缀如 -1 。
除了 之外,还有 opencv:
(如果你们有什么想法或者有什么错误的地方,可以在评论区留言)
- https://www.npmjs.com/package/opencv
- https://www.npmjs.com/package/opencv-wasm
相关参考
- https://github.com/tesseract-ocr/tesseract
- https://github.com/naptha/tesseract.js/
- https://github.com/emscripten-core/emscripten
- https://github.com/UB-Mannheim/tesseract/wiki
- https://www.npmjs.com/package/captcha-cv-ocr