How to view all comments of a user's Netease Cloud

Posted by truck7758 on Sat, 12 Oct 2019 16:10:38 +0200

When you want to see a user's comments, but find that the settings are only visible to you and can't be seen by others, we can do this by writing a python program. If you need to check with me (w-x:fas1024), you can add me. Here is an example of development.

We can see that these comments are made by referring to
music.163.com/weapi/v1/resource/comments/R_SO_4_26075485?csrf_token=
When the post request is initiated, two parameters, params and encSecKey, are passed in.


That is to say, we can get comments just by sending post requests to Netease cloud server through simulated browser!
Note also that the link to this post, R_SO_4 followed by a series of numbers, is actually the id corresponding to the song; and here you need to pass in the parameters, also have a good analysis (later)
First step

The code is as follows:
headers = {

'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'

}

baseUrl = 'https://music.163.com'
def getHtml(url):

r = requests.get(url, headers=headers)
html = r.text
return html

def getUrl():

#Start with the latest song list
startUrl = 'https://music.163.com/discover/playlist/?order=new'
html = getHtml(startUrl)
pattern =re.compile('<li>.*?<p.*?class="dec">.*?<.*?title="(.*?)".*?href="(.*?)".*?>.*?span class="s-fc4".*?title="(.*?)".*?href="(.*?)".*?</li>',re.S)
result = re.findall(pattern,html)

pageNum = re.findall(r'<span class="zdot".?class="zpgi">(.?)',html,re.S)[0]

info = []
for i in result:
    data = {}
    data['title'] = i[0]
    url = baseUrl+i[1]
    print url
    data['url'] = url
    data['author'] = i[2]
    data['authorUrl'] = baseUrl+i[3]
    info.append(data)
   getSongSheet(url)
    time.sleep(random.randint(1,10))
    break
    //This is also an interesting part of Netease Cloud. When we crawl, we need to delete # so that we can see it.
    ![](https://upload-images.jianshu.io/upload_images/7933544-ba9a4003bde734ac?imageMogr2/auto-orient/strip|imageView2/2/w/951/format/webp)
    **The second step**
    def getSongSheet(url):
#Get the id of each song in each song list as the key to the next post
html = getHtml(url)
result = re.findall(r'<li><a.*?href="/song\?id=(.*?)">(.*?)</a></li>',html,re.S)
result.pop()
musicList = []
for i in result:
    data = {}
    headers1 = {
        'Referer': 'https://music.163.com/song?id={}'.format(i[0]),
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
    }
    musicUrl = baseUrl+'/song?id='+i[0]
    print musicUrl
    #Song url
    data['musicUrl'] = musicUrl
    #Song name
    data['title'] = i[1]
    musicList.append(data)
    postUrl = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token='.format(i[0])
    param = {
        'params': get_params(1),
        'encSecKey': get_encSecKey()
    }
    r = requests.post(postUrl,data = param,headers = headers1)
    total = r.json()
    # Total comment
    total = int(total['total'])
    comment_TatalPage = total/20
    # Total base pages
    print comment_TatalPage
    #Judging the number of comment pages, the remainder is one more page, and the division is just right.
    if total%20 != 0:
        comment_TatalPage = comment_TatalPage+1
        comment_data,hotComment_data = getMusicComments(comment_TatalPage, postUrl, headers1)
        #If ID duplication occurs when storing in the database, pay attention to whether there is only one data crawling down.
        saveToMongoDB(str(i[1]),comment_data,hotComment_data)
        print 'End!'
    else:
        comment_data, hotComment_data = getMusicComments(comment_TatalPage, postUrl, headers1)
        saveToMongoDB(str(i[1]),comment_data,hotComment_data)
        print 'End!'

    time.sleep(random.randint(1, 10))
    break
    //According to id, construct postUrl to get the total number of comments and the total number of pages by post on the first page (about how to get the desired information about post, which will be discussed later);

And call the method of getting song reviews;
The third step
def getMusicComments(comment_TatalPage ,postUrl, headers1):

commentinfo = []
hotcommentinfo = []
# Comments on each page
for j in range(1, comment_TatalPage + 1):
    # Hot reviews are available only on the first page
    if j == 1:
        #Get comments
        r = getPostApi(j , postUrl, headers1)
        comment_info = r.json()['comments']
        for i in comment_info:
            com_info = {}
            com_info['content'] = i['content']
            com_info['author'] = i['user']['nickname']
            com_info['likedCount'] = i['likedCount']
            commentinfo.append(com_info)
        hotcomment_info = r.json()['hotComments']
        for i in hotcomment_info:
            hot_info = {}
            hot_info['content'] = i['content']
            hot_info['author'] = i['user']['nickname']
            hot_info['likedCount'] = i['likedCount']
            hotcommentinfo.append(hot_info)
    else:
        r = getPostApi(j, postUrl, headers1)
        comment_info = r.json()['comments']
        for i in comment_info:
            com_info = {}
            com_info['content'] = i['content']
            com_info['author'] = i['user']['nickname']
            com_info['likedCount'] = i['likedCount']
            commentinfo.append(com_info)
    print u'The first'+str(j)+u'Page crawl finished...'
    time.sleep(random.randint(1,10))
print commentinfo
print '\n-----------------------------------------------------------\n'
print hotcommentinfo
return commentinfo,hotcommentinfo

   

Topics: Python JSON Windows Database