문제상황
개발 환경은 eGovFramework에서 ibatis XML을 사용해 query를 날리고, DB는 CUBRID이다.
기존 쿼리는 아래와 같고, 시간은 약 200초로 응답이 너무 오래 걸려 기능 개선 요청이 들어온 상황이다. table명, column명은 맥락과 비슷하게 조금 수정했지만 전체 query의 구조는 동일하게 작성했다.
<select id="commonDAO.selectSubjectCdList" parameterClass="searchVO" resultClass="trainDetailVO">
<![CDATA[
SELECT subject_cd, subject_cd_nm
FROM class_detail ]]>
<isNotEqual property="school_cd" compareValue="SCHOOL0"> <!-- 학교 종류 0번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL1"> <!-- 학교 종류 1번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL2"> <!-- 학교 종류 2번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL3"> <!-- 학교 종류 3번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL4"> <!-- 학교 종류 4번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL5"> <!-- 학교 종류 5번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL6"> <!-- 학교 종류 6번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL7"> <!-- 학교 종류 7번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL8"> <!-- 학교 종류 8번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL9"> <!-- 학교 종류 9번 -->
<![CDATA[ AND class_cd = '09' ]]>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
<isEqual property="school_cd" compareValue="SCHOOL0"> <!-- 학교 종류 0번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL1"> <!-- 학교 종류 1번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL2"> <!-- 학교 종류 2번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL3"> <!-- 학교 종류 3번 -->
<![CDATA[ AND class_cd = '33' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL4"> <!-- 학교 종류 4번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL5"> <!-- 학교 종류 5번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL6"> <!-- 학교 종류 6번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL7"> <!-- 학교 종류 7번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL8"> <!-- 학교 종류 8번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL9"> <!-- 학교 종류 9번 -->
<![CDATA[ AND class_cd = '13' ]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT0"> <!-- 학과 종류 0번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND (instructor_id = #current_user_id# or assistant_instructor_id = #current_user_id#)
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT1"> <!-- 학과 종류 1번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND instructor_id = #current_user_id#
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="qry_is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT2"> <!-- 학과 종류 2번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND (instructor_id = #current_user_id# or assistant_instructor_id = #current_user_id#)
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT3"> <!-- 학과 종류 3번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND (instructor_id = #current_user_id# or assistant_instructor_id = #current_user_id#)
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT4"> <!-- 학과 종류 4번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND teaching_assistant_id = #current_user_id#
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT5"> <!-- 학과 종류 5번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND (assistant_instructor_id = #current_user_id# OR teaching_assistant_id = #current_user_id#)
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT6"> <!-- 학과 종류 6번 -->
<![CDATA[
AND EXISTS (
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND instructor_id = #current_user_id#
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd ]]>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[ AND is_train_curriculum = 'Y' ]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[ AND is_train_curriculum = 'N' ]]>
</isEqual>
<![CDATA[ )
)
]]>
</isEqual>
</select>
분석
일단 코드를 보면 개판이다. 학과 종류에 따라 subject에서 무슨 값을 가져올지가 다른데, 이 로직 하나 때문에 나머지 subquery 전체가 복사+붙여넣기이다. 따라서, 이 부분을 해결할 수 있는 방법을 고민하는 것이 첫 번째 문제이다.
두 번째로, 실행계획이다. 이 query를 아주 간단하게 표현하면 아래와 같으며, WHERE EXISTS문 subquery가 2번 이루어진다.
SELECT *
FROM class_detail
WHERE EXISTS(
SELECT 1
FROM subject
WHERE EXISTS(
SELECT 1
FROM education_schedule
)
)
실행 계획을 살펴봤더니 각 subquery가 seq scan로 도는 것을 보았다. 최적화 해 줄만 한데... 그렇지 않으니까 고쳐야 한다. 일단 원인 분석을 좀 더 해 보면
첫 번째 table인 class_detail의 row 개수는 약 8만개이다.
첫 번째 subquery는 아래와 같고, subject의 row 개수는 약 5천개이다. subject에서는 index에 없는 instructor_id, assistant_instructor_id, teaching_assistant_id, school_cd로 검색을 하고 있다.
SELECT 1 FROM subject WHERE subject_cd = subject_cd AND (instructor_id = #current_user_id# or assistant_instructor_id = #current_user_id#)
AND school_cd = #school_cd#
AND EXISTS (SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd)
두 번째 subquery는 다음과 같고, 하나의 조건에 대해 row 개수는 약 3천개이다. 마찬가지로 education_schedule에서는 year, school_cd, type_cd 등 index에 없는 값으로 검색을 한다.
SELECT 1 FROM education_schedule b WHERE subject.year = b.year AND subject.school_cd = b.school_cd
AND subject.type_cd = b.type_cd AND subject.curriculum_cd = b.curriculum_cd
AND subject.subject_cd = b.subject_cd
이를 nested loop로 돌아버리니.. 모든 데이터를 살펴보며 값을 찾기 때문에 당연히 시간이 오래 걸린다. 검색 조건이 없다고 생각하면 80,000 * 5,000 * 3,000 = 1,200,000,000,000. 1조 2천억이다.(...) 검색 조건이 있으니까 훨씬 준다고 해도 class_detail은 검색 조건이 하나 뿐이니 80,000은 변하지 않고 줄어봤자 subject나 education_schedule이 주는데... 결과값이 3천이니 3000정도로 줄였다고 해도 240,000,000으로 2억 4천이다.
요약
1) 복사+붙여넣기 부분을 없앨 수 있다면 없애야 한다.
2) 결과값은 약 3천개인데도 불구하고 nested loop로 3개의 table을 모두 순회하기 때문에 시간이 오래 걸린다. 각 table size는 10만보다 작기 때문에 다른 방법을 찾아보는 게 좋을 것이다.
해결 : JOIN
JOIN을 써서 해결했다. 기존에는 상위 query에서 사용하는 값을 subquery에서 비교하는 방식을 택했는데, 굳이 이럴 필요 없이 JOIN으로 해당 값이 같은 것을 모두 가져오고, filterling하는 식으로 짜면 될 것 같았다.
JOIN을 사용해도 괜찮을 거라 판단한 이유는, 각 table size가 80,000, 5,000, 3,000으로 적은 편기 때문에 JOIN에 대한 부하가 많이 걸리지 않으리라 생각했다. 실제로 DB에 JOIN query를 날렸을 때는 약 1초 내외로 작동하는 것을 확인했고 이 방식으로 query를 작성했다.
매우 간단하게 표현하면 아래와 같다. 2개의 subquery로 나타난 부분을 JOIN + WHERE로 필터링하는 식으로 작성했고, 기존에 subquery가 2개였기 때문에 여기서도 JOIN + WHERE가 2개인 것을 볼 수 있다.
SELECT *
FROM class_detail d JOIN
(
SELECT * FROM subject s JOIN education_schedule e
ON [JOIN CONDITION]
WHERE [WHERE CONDITION]
) r
ON [JOIN CONDITION]
WHERE [WHERE CONDITION]
tunning한 결과는 아래와 같다. 동작하고 있는 서버를 고치는 것이었기 때문에 기존 로직은 최대한 건드리지 않고 작성했다. 솔직히 isEqual과 isNotEqual이 너무 많아서 그렇게 깔끔하다고는 표현할 수 없겠으나 원래 목표인 중복된 부분 제거, 시간 단축 2개의 목표는 달성했기 때문에 이정도로 작성했다.
<!-- after tunning -->
<select id="commonDAO.selectSubjectCdList" parameterClass="searchVO" resultClass="trainDetailVO">
<![CDATA[
SELECT subject_cd, subject_cd_nm
FROM class_detail d JOIN
(SELECT DISTINCT s.subject_cd FROM subject s JOIN education_schedule e
ON s.year = e.year AND s.school_cd = e.school_cd AND s.type_cd = e.type_cd AND s.curriculum_cd = e.curriculum_cd AND s.subject_cd = e.subject_cd
WHERE s.school_cd = #school_cd# ]]>
<isEqual property="department" compareValue="DEPARTMENT0"> <!-- 학과 종류 0번 -->
<![CDATA[
AND (s.instructor_id = #current_user_id# OR s.assistant_instructor_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT1"> <!-- 학과 종류 1번 -->
<![CDATA[
AND (s.instructor_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT2"> <!-- 학과 종류 2번 -->
<![CDATA[
AND (s.instructor_id = #current_user_id# OR s.assistant_instructor_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT3"> <!-- 학과 종류 3번 -->
<![CDATA[
AND (s.instructor_id = #current_user_id# OR s.assistant_instructor_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT4"> <!-- 학과 종류 4번 -->
<![CDATA[
AND (s.teaching_assistant_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT5"> <!-- 학과 종류 5번 -->
<![CDATA[
AND (s.assistant_instructor_id = #current_user_id# OR s.teaching_assistant_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="department" compareValue="DEPARTMENT6"> <!-- 학과 종류 6번 -->
<![CDATA[
AND (s.instructor_id = #current_user_id#)
]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="Y">
<![CDATA[
AND (e.is_train_curriculum = 'Y')
]]>
</isEqual>
<isEqual property="is_train_curriculum" compareValue="N">
<![CDATA[
AND (e.is_train_curriculum = 'N')
]]>
</isEqual>
<![CDATA[
) r
ON d.cd = r.subject_cd
WHERE 1 ]]>
<isNotEqual property="school_cd" compareValue="SCHOOL0"> <!-- 학교 종류 0번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL1"> <!-- 학교 종류 1번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL2"> <!-- 학교 종류 2번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL3"> <!-- 학교 종류 3번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL4"> <!-- 학교 종류 4번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL5"> <!-- 학교 종류 5번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL6"> <!-- 학교 종류 6번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL7"> <!-- 학교 종류 7번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL8"> <!-- 학교 종류 8번 -->
<isNotEqual property="school_cd" compareValue="SCHOOL9"> <!-- 학교 종류 9번 -->
<![CDATA[ AND d.class_cd = '09' ]]>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
</isNotEqual>
<isEqual property="school_cd" compareValue="SCHOOL0"> <!-- 학교 종류 0번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL1"> <!-- 학교 종류 1번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL2"> <!-- 학교 종류 2번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL3"> <!-- 학교 종류 3번 -->
<![CDATA[ AND d.class_cd = '33' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL4"> <!-- 학교 종류 4번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL5"> <!-- 학교 종류 5번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL6"> <!-- 학교 종류 6번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL7"> <!-- 학교 종류 7번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL8"> <!-- 학교 종류 8번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
<isEqual property="school_cd" compareValue="SCHOOL9"> <!-- 학교 종류 9번 -->
<![CDATA[ AND d.class_cd = '13' ]]>
</isEqual>
</select>
'Development > SQL' 카테고리의 다른 글
[DB] MySQL 등 ERD에서 id를 long으로 두는 이유 (0) | 2022.10.04 |
---|