본문 바로가기
자바 풀스택 공부

Day 56. [JSP/Servlet/JavaScript] Cors, 자바로 웹 크롤링, 도로명주소API

by seung_nari 2022. 3. 25.

Cors

 

https://evan-moon.github.io/2020/05/21/about-cors/

 

CORS는 왜 이렇게 우리를 힘들게 하는걸까?

이번 포스팅에서는 웹 개발자라면 한번쯤은 얻어맞아 봤을 법한 정책에 대한 이야기를 해보려고 한다. 사실 웹 개발을 하다보면 CORS 정책 위반으로 인해 에러가 발생하는 상황은 굉장히 흔해서

evan-moon.github.io

 

 


자바에서 웹 크롤링 하기 위한 라이브러리

 

https://mvnrepository.com/artifact/org.jsoup/jsoup/1.14.3

 

package app;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.net.URL;
import java.sql.PreparedStatement;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import utils.DBConn;

public class HTMLParser {
	public static void main(String[] args) throws Exception {
		Connection conn = Jsoup.connect("https://www.musinsa.com/category/014001");
		Document doc = conn.get();
		Elements elements = doc.select(".li_box");
		for(int i = 0; i < elements.size() ; i++) {
			Element el = elements.get(i);
			
			String no = el.attr("data-no");
			String title = el.selectFirst(".item_title").text();
			String info = el.selectFirst(".list_info").text();
			String price = el.selectFirst(".price").text();
			String link = el.selectFirst(".list_info a").attr("href");
			Element img = el.selectFirst(".list_img img");
			
			Map<String, String> map = new HashMap<String, String>();
			map.put("no", no);
			map.put("title", title);
			map.put("info", info);
			map.put("price", price);
			map.put("link", link);
			System.out.println(map);
			saveDB(map);
			saveFile(no, img.attr("data-original"));
			System.out.println(no + "번 작업 끝");
		}
		
	}
	
	static void saveFile(String no, String imgSrc) throws Exception {
		URL url = new URL(imgSrc);
		BufferedInputStream bis = new BufferedInputStream(url.openStream());
		
		File file = new File("D:\\mushinsa", no);
		if(!file.exists()) {
			file.mkdirs();
		}
		
		file = new File(file, "thumb.jpg");
		BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));		
		
		int b = 0;
		while((b = bis.read()) != -1){
			bos.write(b);
		}
		
		bos.close();
	}
	
	static void saveDB(Map<String, String> map) throws Exception{
		PreparedStatement pstmt = DBConn.getConnection().prepareStatement(
				"INSERT INTO TBL_MUSINSA_SAMPLE VALUES(?, ?, ?, ?, ?)");
		int idx = 1;
		pstmt.setString(idx++, map.get("no"));
		pstmt.setString(idx++, map.get("title"));
		pstmt.setString(idx++, map.get("info"));
		pstmt.setString(idx++, map.get("price"));
		pstmt.setString(idx++, map.get("link"));
		pstmt.executeUpdate();
		pstmt.close();
	}
	
	static void doOldParsing() throws Exception {
		String urlStr = "https://www.musinsa.com/category/014001";
		URL url = new URL(urlStr);
		BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));
		BufferedWriter bw = new BufferedWriter(new FileWriter("무신사.html"));
		
		String s = null;
		while((s = br.readLine()) != null){
			System.out.println(s);
			bw.write(s);
			bw.newLine();
		}
		bw.close();
	}
}
DROP TABLE TBL_MUSINSA_SAMPLE;
CREATE TABLE TBL_MUSINSA_SAMPLE (
    NO NUMBER PRIMARY KEY,
    TITLE VARCHAR2(2000),
    INFO VARCHAR2(4000),
    PRICE VARCHAR2(2000),
    LINK VARCHAR(2000)
);

SELECT * FROM TBL_MUSINSA_SAMPLE;


오늘자 불태운 로그인 만들기

 

juso.go.kr 에서 도로명 주소 검색 api, 팝업 api 를 들고 와서 작업했습니다.

 

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html lang="ko">
    <head>
    	<jsp:include page="../common/head.jsp" />
    </head>
    <body class="bg-primary">
        <div id="layoutAuthentication">
            <div id="layoutAuthentication_content">
                <main>
                    <div class="container">
                        <div class="row justify-content-center">
                            <div class="col-lg-7">
                                <div class="card shadow-lg border-0 rounded-lg mt-5">
                                    <div class="card-header"><h3 class="text-center font-weight-light my-4">Create Account</h3></div>
                                    <div class="card-body">
                                        <form method="post">
                                            <div class="form-floating mb-3 input-group">
                                                <input class="form-control" id="id" type="text" name="id" placeholder="enter "/>
                                                <label for="id">ID</label>
                                                <button class="btn btn-success" type="button">ID 중복 체크</button>
                                            </div>
                                            <div class="row mb-3">
                                                <div class="col-md-6">
                                                    <div class="form-floating mb-3 mb-md-0">
                                                        <input class="form-control" id="inputPassword" name="pw" type="password" placeholder="Create a password" />
                                                        <label for="inputPassword">Password</label>
                                                    </div>
                                                </div>
                                                <div class="col-md-6">
                                                    <div class="form-floating mb-3 mb-md-0">
                                                        <input class="form-control" id="inputPasswordConfirm" name="pw2" type="password" placeholder="Confirm password" />
                                                        <label for="inputPasswordConfirm">Confirm Password</label>
                                                    </div>
                                                </div>
                                            </div>
                                            <div class="form-floating mb-3">
                                                <input class="form-control" id="name" name="name" type="text" placeholder="enter name" />
                                                <label for="name">name</label>
                                            </div>
                                            <div class="form-floating mb-3 input-group">
                                                <input class="form-control" id="email" name="email" type="text" placeholder="enter email" />
                                                <label for="email">email</label>
                                                <button class="btn btn-danger" type="button">인증메일발송</button>
                                            </div>
                                            <div class="form-floating mb-3">
                                                <input class="form-control" id="code" name="code" type="text" placeholder="인증 코드" />
                                                <!-- <label for="code">이메일 중복 체크</label> -->
                                                <label for="code">인증 코드</label>
                                            </div>
                                            <hr>
                                            <button type="button" id="btnSearchAddr" class="mb-3 btn btn-sm btn-secondary">주소검색</button>
                                            <div class="row mb-3">
                                                <div class="col-md-4">
                                                    <div class=" mb-3 mb-md-0">
                                                        <input class="form-control" id="addr1" name="addr1" readonly/>
                                                    </div>
                                                </div>
                                                <div class="col-md-4">
                                                    <div class=" mb-3 mb-md-0">
                                                        <input class="form-control" id="addr2" name="addr2" readonly />
                                                    </div>
                                                </div>
                                                <div class="col-md-4">
                                                    <div class=" mb-3 mb-md-0">
                                                        <input class="form-control" id="addr3" name="addr3" readonly />
                                                    </div>
                                                </div>
                                            </div>
                                            <div class=" mb-3">
                                                <input class="form-control" id="addr" name="addr" type="text" placeholder="enter addr" readonly/>
                                            </div>
                                            <div class="mt-4 mb-0">
                                                <div class="d-grid"><button class="btn btn-primary btn-block" >Create Account</button></div>
                                            </div>
                                        </form>
                                    </div>
                                    <div class="card-footer text-center py-3">
                                        <div class="small"><a href="login.html">Have an account? Go to login</a></div>
                                    </div>
                                </div>
                            </div>
                        </div>
                    </div>
                </main>
            </div>
            <div id="layoutAuthentication_footer">
                <jsp:include page="../common/footer.jsp"/>
            </div>
        </div>
        <script>
        $(function(){
        	$("#btnSearchAddr").click(function(){
        		var pop = window.open("${pageContext.request.contextPath}/juso","pop","width=570,height=420, scrollbars=yes, resizable=yes"); 
        	})
        })
        function jusoCallBack(roadFullAddr,roadAddrPart1,addrDetail,roadAddrPart2,engAddr, jibunAddr, zipNo, admCd, rnMgtSn, bdMgtSn,detBdNmList,bdNm,bdKdcd,siNm,sggNm,emdNm,liNm,rn,udrtYn,buldMnnm,buldSlno,mtYn,lnbrMnnm,lnbrSlno,emdNo){
			$("#addr1").val(siNm);
			$("#addr2").val(sggNm);
			$("#addr3").val(emdNm);
			$("#addr").val(addrDetail);
            
            // 팝업페이지에서 주소입력한 정보를 받아서, 현 페이지에 정보를 등록합니다.
			// document.form.roadFullAddr.value = roadFullAddr;
			// document.form.roadAddrPart1.value = roadAddrPart1;
			// document.form.roadAddrPart2.value = roadAddrPart2;
			// document.form.addrDetail.value = addrDetail;
			// document.form.engAddr.value = engAddr;
			// document.form.jibunAddr.value = jibunAddr;
			// document.form.zipNo.value = zipNo;
			// document.form.admCd.value = admCd;
			// document.form.rnMgtSn.value = rnMgtSn;
			// document.form.bdMgtSn.value = bdMgtSn;
			// document.form.detBdNmList.value = detBdNmList;
			/** 2017년 2월 추가제공 **/
			document.form.bdNm.value = bdNm;
			document.form.bdKdcd.value = bdKdcd;
			document.form.siNm.value = siNm;
			document.form.sggNm.value = sggNm;
			document.form.emdNm.value = emdNm;
			document.form.liNm.value = liNm;
			document.form.rn.value = rn;
			document.form.udrtYn.value = udrtYn;
			document.form.buldMnnm.value = buldMnnm;
			document.form.buldSlno.value = buldSlno;
			document.form.mtYn.value = mtYn;
			document.form.lnbrMnnm.value = lnbrMnnm;
			document.form.lnbrSlno.value = lnbrSlno;
			/** 2017년 3월 추가제공 **/
			document.form.emdNo.value = emdNo;				
		}
        </script>
    </body>
</html>

 

<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
<% 
	//request.setCharacterEncoding("UTF-8");  //한글깨지면 주석제거
	//request.setCharacterEncoding("EUC-KR");  //해당시스템의 인코딩타입이 EUC-KR일경우에
	String inputYn = request.getParameter("inputYn"); 
	String roadFullAddr = request.getParameter("roadFullAddr"); 
	String roadAddrPart1 = request.getParameter("roadAddrPart1"); 
	String roadAddrPart2 = request.getParameter("roadAddrPart2"); 
	String engAddr = request.getParameter("engAddr"); 
	String jibunAddr = request.getParameter("jibunAddr"); 
	String zipNo = request.getParameter("zipNo"); 
	String addrDetail = request.getParameter("addrDetail"); 
	String admCd    = request.getParameter("admCd");
	String rnMgtSn = request.getParameter("rnMgtSn");
	String bdMgtSn  = request.getParameter("bdMgtSn");
	String detBdNmList  = request.getParameter("detBdNmList");	
	/** 2017년 2월 추가제공 **/
	String bdNm  = request.getParameter("bdNm");
	String bdKdcd  = request.getParameter("bdKdcd");
	String siNm  = request.getParameter("siNm");
	String sggNm  = request.getParameter("sggNm");
	String emdNm  = request.getParameter("emdNm");
	String liNm  = request.getParameter("liNm");
	String rn  = request.getParameter("rn");
	String udrtYn  = request.getParameter("udrtYn");
	String buldMnnm  = request.getParameter("buldMnnm");
	String buldSlno  = request.getParameter("buldSlno");
	String mtYn  = request.getParameter("mtYn");
	String lnbrMnnm  = request.getParameter("lnbrMnnm");
	String lnbrSlno  = request.getParameter("lnbrSlno");
	/** 2017년 3월 추가제공 **/
	String emdNo  = request.getParameter("emdNo");

%>
</head>
<script language="javascript">
// opener관련 오류가 발생하는 경우 아래 주석을 해지하고, 사용자의 도메인정보를 입력합니다. ("주소입력화면 소스"도 동일하게 적용시켜야 합니다.)
//document.domain = "abc.go.kr";

/*
		모의 해킹 테스트 시 팝업API를 호출하시면 IP가 차단 될 수 있습니다. 
		주소팝업API를 제외하시고 테스트 하시기 바랍니다.
*/

function init(){
	var url = location.href;
	var confmKey = "U01TX0FVVEgyMDIyMDMyNTEwMjIzMjExMjM4ODA=";
	var resultType = "4"; // 도로명주소 검색결과 화면 출력내용, 1 : 도로명, 2 : 도로명+지번+상세보기(관련지번, 관할주민센터), 3 : 도로명+상세보기(상세건물명), 4 : 도로명+지번+상세보기(관련지번, 관할주민센터, 상세건물명)
	var inputYn= "<%=inputYn%>";
	if(inputYn != "Y"){
		document.form.confmKey.value = confmKey;
		document.form.returnUrl.value = url;
		document.form.resultType.value = resultType;
		document.form.action="https://www.juso.go.kr/addrlink/addrLinkUrl.do"; //인터넷망
		//document.form.action="https://www.juso.go.kr/addrlink/addrMobileLinkUrl.do"; //모바일 웹인 경우, 인터넷망
		document.form.submit();
	}else{
		opener.jusoCallBack("<%=roadFullAddr%>","<%=roadAddrPart1%>","<%=addrDetail%>","<%=roadAddrPart2%>","<%=engAddr%>","<%=jibunAddr%>","<%=zipNo%>", "<%=admCd%>", "<%=rnMgtSn%>", "<%=bdMgtSn%>", "<%=detBdNmList%>", "<%=bdNm%>", "<%=bdKdcd%>", "<%=siNm%>", "<%=sggNm%>", "<%=emdNm%>", "<%=liNm%>", "<%=rn%>", "<%=udrtYn%>", "<%=buldMnnm%>", "<%=buldSlno%>", "<%=mtYn%>", "<%=lnbrMnnm%>", "<%=lnbrSlno%>", "<%=emdNo%>");
		window.close();
		}
}
</script>
<body onload="init();">
	<form id="form" name="form" method="post">
		<input type="hidden" id="confmKey" name="confmKey" value=""/>
		<input type="hidden" id="returnUrl" name="returnUrl" value=""/>
		<input type="hidden" id="resultType" name="resultType" value=""/>
		<!-- 해당시스템의 인코딩타입이 EUC-KR일경우에만 추가 START-->
		<!-- 
		<input type="hidden" id="encodingType" name="encodingType" value="EUC-KR"/>
		 -->
		<!-- 해당시스템의 인코딩타입이 EUC-KR일경우에만 추가 END-->
	</form>
</body>
</html>

댓글