使用Java Xpath 爬取某易云歌曲
阅读原文时间:2023年08月25日阅读:1

本文使用Java xpath 爬取某易云歌曲,并下载至本地。

代码仅用于个人学习使用,欢迎各位大佬提出建议。

1、添加依赖

        <dependency>
            <groupId>cn.wanghaomiao</groupId>
            <artifactId>JsoupXpath</artifactId>
            <version>2.2</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.8.9</version>
        </dependency>

2、获取音乐id和url

    /**
     * 获取歌曲信息
     *
     * @param url
     * @return
     */
    public Map<String, Object> getMusicInfo(String url) {
        Map<String, Object> result = new HashMap<>();
        url = url.replace("/#", "");
        Map<String, Object> header = new HashMap<>();
        header.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36");
        header.put("Referer", "https://music.163.com/");
        header.put("Host", "music.163.com");
        String res = HttpUtil.get(url, header);

        JXDocument jxDocument = JXDocument.create(res);
        //歌曲列表
        List<JXNode> songs = jxDocument.selN("//ul[@class=\"f-hide\"]/li/a");
        //歌单名称
        JXNode jxsonglistName = jxDocument.selNOne("//h2[contains(@class,\"f-ff2\")]/text()");
        //歌手名
        JXNode jxsingerName = jxDocument.selNOne("//h2[@id=\"artist-name\"]/text()");
        String songlistName = null != jxsonglistName ? jxsonglistName.toString() : "";
        String singerName = null != jxsingerName ? jxsingerName.toString() : "";

        System.out.println(String.format("=======================%s=======================", StrUtil.isBlank(songlistName) ? singerName : songlistName));
        List<Map<String, Object>> musics = new ArrayList<>();
        result.put("title", StrUtil.isBlank(songlistName) ? singerName : songlistName);
        for (JXNode song : songs) {
            Element element = song.asElement();
            String songName = element.text();
            String songId = element.attr("href").split("=")[1];
            String songUrl = OUT_LINK + songId;
            Map<String, Object> map = new HashMap<>();
            map.put("songId", songId);
            map.put("songName", songName);
            map.put("songUrl", songUrl);
            map.put("title", StrUtil.isBlank(songlistName) ? singerName : songlistName);
            //map.put("lyric", getMusicLyric(songId));
            musics.add(map);
            //单线程下载歌曲
            //downloadSong(songName, songUrl, result.get("title").toString());
        }
        musics.forEach(x -> System.out.println(x));

        //多线程下载歌曲
        //multiDownload(musics);
        result.put("songs", musics);
        //System.out.println(result);
        return result;
    }

3、获取歌词

    /**
     * 获取歌词
     *
     * @param songId
     * @return
     */
    public String getMusicLyric(String songId) {
        String url = String.format("http://music.163.com/api/song/lyric?id=%s&lv=-1&kv=-1&tv=-1", songId);
        Map<String, Object> header = new HashMap<>();
        header.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36");
        header.put("Referer", "https://music.163.com/");
        header.put("Host", "music.163.com");
        String res = HttpUtil.get(url, header);
        return JSONObject.parseObject(res).getJSONObject("lrc").getString("lyric");
    }

4、完整代码

加入多线程下载歌曲代码

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.nodes.Element;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Music163 {
    //下载地址
    private static String OUT_LINK = "http://music.163.com/song/media/outer/url?id=";
    //本地下载目录
    private static String DOWNLOAD_PATH = "E:\\music\\";

    public static void main(String[] args) {
        String musicUrl;
        //歌曲清单
        // 热歌 3778678 原创 2884035  新歌 3779629 飙升 19723756
        musicUrl = "https://music.163.com/#/playlist?id=3778678";
        // 歌手歌曲榜单  8325->梁静茹
        //musicUrl = "https://music.163.com/#/artist?id=8325";
        //搜索列表
        // musicUrl = "https://music.163.com/#/search/m/?order=hot&cat=全部&limit=435&offset=435&s=梁静茹";
        Music163 music163 = new Music163();
        music163.getMusicInfo(musicUrl);
    }

    /**
     * 获取歌曲信息
     *
     * @param url
     * @return
     */
    public Map<String, Object> getMusicInfo(String url) {
        Map<String, Object> result = new HashMap<>();
        url = url.replace("/#", "");
        Map<String, Object> header = new HashMap<>();
        header.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36");
        header.put("Referer", "https://music.163.com/");
        header.put("Host", "music.163.com");
        String res = HttpUtil.get(url, header);

        JXDocument jxDocument = JXDocument.create(res);
        //歌曲列表
        List<JXNode> songs = jxDocument.selN("//ul[@class=\"f-hide\"]/li/a");
        //歌单名称
        JXNode jxsonglistName = jxDocument.selNOne("//h2[contains(@class,\"f-ff2\")]/text()");
        //歌手名
        JXNode jxsingerName = jxDocument.selNOne("//h2[@id=\"artist-name\"]/text()");
        String songlistName = null != jxsonglistName ? jxsonglistName.toString() : "";
        String singerName = null != jxsingerName ? jxsingerName.toString() : "";

        System.out.println(String.format("=======================%s=======================", StrUtil.isBlank(songlistName) ? singerName : songlistName));
        List<Map<String, Object>> musics = new ArrayList<>();
        result.put("title", StrUtil.isBlank(songlistName) ? singerName : songlistName);
        for (JXNode song : songs) {
            Element element = song.asElement();
            String songName = element.text();
            String songId = element.attr("href").split("=")[1];
            String songUrl = OUT_LINK + songId;
            Map<String, Object> map = new HashMap<>();
            map.put("songId", songId);
            map.put("songName", songName);
            map.put("songUrl", songUrl);
            map.put("title", StrUtil.isBlank(songlistName) ? singerName : songlistName);
            //map.put("lyric", getMusicLyric(songId));
            musics.add(map);
            //单线程下载歌曲
            //downloadSong(songName, songUrl, result.get("title").toString());
        }
        musics.forEach(x -> System.out.println(x));

        //多线程下载歌曲
        //multiDownload(musics);
        result.put("songs", musics);
        //System.out.println(result);
        return result;
    }

    /**
     * 获取歌词
     *
     * @param songId
     * @return
     */
    public String getMusicLyric(String songId) {
        String url = String.format("http://music.163.com/api/song/lyric?id=%s&lv=-1&kv=-1&tv=-1", songId);
        Map<String, Object> header = new HashMap<>();
        header.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36");
        header.put("Referer", "https://music.163.com/");
        header.put("Host", "music.163.com");
        String res = HttpUtil.get(url, header);
        return JSONObject.parseObject(res).getJSONObject("lrc").getString("lyric");
    }

    /**
     * 歌曲下载
     *
     * @param songName
     * @param songUrl
     * @param title
     */
    public void downloadSong(String songName, String songUrl, String title) {
        HttpUtil.downloadFile(songUrl,
                FileUtil.file(DOWNLOAD_PATH + title + "\\", songName + ".mp3"));
        Console.log("下载完成!" + "==》" + songName);
    }

    /**
     * 多线程下载
     *
     * @param list
     */
    public void multiDownload(List<Map<String, Object>> list){
        //使用多线程优化查询速度
        int threadNum = 10;
        if (list.size() < 10) {
            threadNum = 1;
        }
        ExecutorService executorService = Executors.newFixedThreadPool(threadNum);
        CountDownLatch countDownLatch = new CountDownLatch(threadNum);

        int perSize = list.size() / threadNum;
        for (int i = 0; i < threadNum; i++) {
            int start = i * perSize;
            int end = (i + 1) * perSize;
            if (i == threadNum - 1) {
                end = list.size();
            }
            List<Map<String, Object>> maps = list.subList(start, end);
            MultiThread thread = new MultiThread();
            thread.setProjectList(maps);
            thread.setCountDownLatch(countDownLatch);
            executorService.submit(thread);
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        executorService.shutdown();
    }

    class MultiThread extends Thread {
        private List<Map<String, Object>> projectList;

        private CountDownLatch countDownLatch;

        private List<Map<String, Object>> result;

        public void setResultList(List<Map<String, Object>> result) {
            this.result = result;
        }

        public void setProjectList(List<Map<String, Object>> projectList) {
            this.projectList = projectList;
        }

        public void setCountDownLatch(CountDownLatch countDownLatch) {
            this.countDownLatch = countDownLatch;
        }

        @Override
        public void run() {
            try {

                for (Map<String, Object> map : projectList) {
                    downloadSong((String)map.get("songName"),(String)map.get("songUrl"),(String)map.get("title"));
                }

            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (countDownLatch != null) {
                    countDownLatch.countDown();
                }
            }
        }
    }
}

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章