Picture download of node crawler

Posted by Ryan0r on Thu, 23 Apr 2020 17:04:00 +0200

Background: for some players who want to change their avatars, but don't know what avatars to use, as a code enthusiast, if they can use the program to solve the problem, they don't need to use the program to change their avatars, just do what they say, and then sort it out.

design sketch

Environment configuration

  • Install node environment
  • node -v
  • The node version is better than 8.11.1

Project structure

assets is to store the downloaded pictures

Static is a static resource page

Eg.js is an example of downloading pictures (node eg.js)

img.json is the JSON data obtained by the web page

index.js belongs to the server

Installation dependency

npm init ( Will generate a package.json)

npm i express --save-dev

npm i cheerio--save-dev

npm i superagent--save-dev

npm i superagent-charset--save-dev

npm i request--save-dev

Code area

1. eg.js

var fs = require('fs');
var request = require("request");
var path = require('path');
var src = "https://pic.qqtn.com/up/2019-6/2019061811092772406.jpg";
var writeStream = fs.createWriteStream('./assets/aa.png');
var readStream = request(src)
readStream.pipe(writeStream);

readStream.on('end', function() {
    console.log('File downloaded successfully');
});
readStream.on('error', function() {
    console.log("error message:" + err)
})
writeStream.on("finish", function() {
    console.log("File written successfully");
    writeStream.end();
});

2.index.js

var superagent = require('superagent');
var charset = require('superagent-charset');
charset(superagent);
var express = require('express');
var baseUrl = 'https://www.qqtn.com/';
const cheerio = require('cheerio');
var request = require("request");
var fs = require('fs')
var path = require('path')

var checkDir = fs.existsSync("assets");
var app = express();
app.use(express.static('static'))
app.get('/index', function (req, res) {
    //Set request header
    res.header("Access-Control-Allow-Origin", "*");
    res.header('Access-Control-Allow-Methods', 'PUT, GET, POST, DELETE, OPTIONS');
    res.header("Access-Control-Allow-Headers", "X-Requested-With");
    res.header('Access-Control-Allow-Headers', 'Content-Type');
    //type
    console.log(req.query, 'type')
    var type = req.query.type;
    //Page number
    var page = req.query.page;
    type = type || 'weixin';
    page = page || '1';
    var route = `tx/${type}tx_${page}.html`
    //Web page information is gb2312,therefore chaeset Should be.charset('gb2312'),The general web page is utf-8,Can be used directly.charset('utf-8')
    superagent.get(baseUrl + route)
        .charset('gb2312')
        .end(function (err, sres) {
            var items = [];
            if (err) {
                console.log('ERR: ' + err);
                res.json({ code: 400, msg: err, sets: items });
                return;
            }
            var $ = cheerio.load(sres.text);
            $('div.g-main-bg ul.g-gxlist-imgbox li a').each(function (idx, element) {
                var $element = $(element);
                var $subElement = $element.find('img');
                var thumbImgSrc = $subElement.attr('src');
                items.push({
                    title: $(element).attr('title'),
                    href: $element.attr('href'),
                    thumbSrc: thumbImgSrc
                });
            });
            if (!checkDir) {
                fs.mkdir('assets', function (error) {
                    if (error) {
                        console.log(error);
                        return false;
                    }
                    console.log('Directory created successfully');
                })
            }
            fs.access(path.join(__dirname, '/img.json'), fs.constants.F_OK, err => {
                if (err) { // file does not exist
                    fs.writeFile(path.join(__dirname, '/img.json'), JSON.stringify([
                        {
                            route,
                            items
                        }
                    ]), err => {
                        if (err) {
                            console.log(err)
                            return false
                        }
                        console.log('Saved successfully')
                    })
                } else {
                    fs.readFile(path.join(__dirname, '/img.json'), (err, data) => {
                        if (err) {
                            return false
                        }
                        data = JSON.parse(data.toString())
                        let exist = data.some((page, index) => {
                            return page.route == route
                        })
                        if (!exist) {
                            fs.writeFile(path.join(__dirname, 'img.json'), JSON.stringify([
                                ...data,
                                {
                                    route,
                                    items
                                },
                            ]), err => {
                                if (err) {
                                    return false
                                }
                            })
                        }
                    })
                }
                res.json({ code: 200, msg: "", data: items });
            })
            try {
                fs.readFile(path.join(__dirname, '/img.json'), (err, data) => {
                    if (err) {
                        return false
                    }else{
                        data = JSON.parse(data.toString());
                        data.map((v, i) => {
                            v.items.map((v,i) => {
                                i = request(v.thumbSrc)
                                // Suffix.jpg Available regular matches
                                i.pipe(fs.createWriteStream('./assets/' + v.title + '.jpg'));
                            })
    
                        })
                    }
                })
            } catch(err){}
        })
});
app.get('/show', (req, res) => {
    fs.readFile(path.join(__dirname, 'img.json'), (err, data) => {
        if (err) {
            console.log(err)
            return false
        }
        res.json(data.toString())
    })
})
var server = app.listen(8081, function () {
    var host = server.address().address
    var port = server.address().port
})
View Code

3. index.html under the static folder

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta http-equiv="X-UA-Compatible" content="ie=edge">
  <title>Document</title>
</head>
<body>
  <script src="./index.js"></script>
</body>
</html>

4. index.js under the static folder

fetch('/index', {
  method: 'GET'
}).then(res => {
  return res.json()
}).then(res => {

  if (res.code == 200) {
    fetch('/show', {
      method: 'GET'
    }).then(res => {
      return res.json()
    }).then(res => {
      res = JSON.parse(res)
      console.log(res, res.length)
      document.body.innerHTML = res.map((page, index) => {
        console.log(page)
        return page.items.map((item, itemIndex) => {
          return `<a href="${item.thumbSrc}" ><img src="${item.thumbSrc}" width="200" height="200"/></a>`
        }).join('')
      }).join('')
    })
  }

})

summary

It's almost over here. I'm still in awe of the node, and finally I've finished writing the demo. The project has also been sent to gitHub. If you need to, you can send me a private message

Topics: JSON npm JQuery IE