본문 바로가기

Java

[자바 알고리즘] 크롤링

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package com.wego.web.test;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler {
    public static void main(String[] args) {
       try {
           Document rawData =  Jsoup.connect("https://music.bugs.co.kr/chart").timeout(10*1000).get();
             Elements artist =  rawData.select("p[class=artist]");
             Elements title =  rawData.select("p[class=title]");
             List<String> artist2 = new  ArrayList<>();
             List<String> title2 = new  ArrayList<>();
             for(Element e : artist) {
                 artist2.add(e.text());
             }
             for(Element e : title) {
                 title2.add(e.text());
             }
             System.out.println(artist2);
              System.out.println("---------------");
             System.out.println(title2);
       } catch (Exception e2) {
           // TODO Auto-generated catch  block
           e2.printStackTrace();
       }
    }
}
cs



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package com.wego.web.test;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler2 {
    public static void main(String[] args) {
       try {
           Document rawData =  Jsoup.connect("https://music.bugs.co.kr/recomreview?&order=listorder&page=2").timeout(10*1000).get();
             Elements artist =  rawData.select("aside[class=recommendReview]  p");
             List<String> artist2 = new  ArrayList<>();
             for(Element e : artist) {
                 artist2.add(e.text());
             }
             System.out.println(artist2);
       } catch (Exception e2) {
           // TODO Auto-generated catch  block
           e2.printStackTrace();
       }
    }
}
cs