Skip to content

Commit

Permalink
Merge pull request #74 from GDSC-PKNU-21-22/feat/#59
Browse files Browse the repository at this point in the history
Feat/#59: 졸업요건 크롤링 후 db에 저장 & 엔드포인트 설정
  • Loading branch information
hwinkr authored Aug 16, 2023
2 parents d67320b + 3f35e6e commit 0d054a0
Show file tree
Hide file tree
Showing 8 changed files with 385 additions and 9 deletions.
20 changes: 20 additions & 0 deletions src/apis/graduation/controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import express, { Request, Response } from 'express';

import { getGraduationLink } from './service';

const router = express.Router();

router.get('/', async (req: Request, res: Response) => {
try {
const major = req.query.major as string;
if (major === undefined) {
throw new Error('전공을 선택해주세요!');
}
const graduationLink = await getGraduationLink(major);
return res.json(graduationLink);
} catch (err) {
console.log(err);
}
});

export default router;
31 changes: 31 additions & 0 deletions src/apis/graduation/service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import db from '@db/index';
import { QueryError, RowDataPacket } from 'mysql2';

interface GraduationLink extends RowDataPacket {
department: string;
link: string;
}

export const getGraduationLink = async (
major: string,
): Promise<GraduationLink | null> => {
return new Promise((resolve, reject) => {
const getGraduationLinkQuery =
'SELECT department, link FROM graduation WHERE department = ?';
db.query<GraduationLink[]>(
getGraduationLinkQuery,
[major],
(err: QueryError, result) => {
if (err) {
reject(err);
} else {
if (result) {
resolve(result[0] as GraduationLink);
} else {
resolve(null);
}
}
},
);
});
};
77 changes: 77 additions & 0 deletions src/constants/graduation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
interface Keywords {
[key: string]: string;
}
export const EXCEPTIONAL_GRADUATION_KEYWORDS: Keywords = {
법학과: '졸업자격요건',
영어영문학부: '졸업자격요건',
사학과: '졸업안내',
국제지역학부: '졸업규정',
국제통상학부: '졸업사정',
금속공학전공: '졸업 요건',
양식응용생명과학전공: '졸업 요건',
냉동공조공학전공: '졸업매뉴얼',
언론정보전공: '졸업 요건',
};

export const EXCEPTIONAL_SEARCH_KEYWORDS: Keywords = {
법학과: '졸업관련게시판',
경영학부: '졸업 안내',
조선해양시스템공학전공: '학부 규정',
언론정보전공: '전공공지사항',
글로벌자율전공학부: '공지사항(중요)',
};

export const EXCEPTIONAL_GRADUATION_LINKS: Keywords = {
해양수산경영학전공:
'https://mbe.pknu.ac.kr/view.do?no=504&idx=10414&view=view&pageIndex=1&sv=A&sw=%EC%A1%B8%EC%97%85%EC%9A%94%EA%B1%B4',
환경대기과학전공:
'https://icms.pknu.ac.kr/envatm/2268?action=view&no=9918655',
공간정보시스템공학전공:
'http://geoinfo.pknu.ac.kr//05piazza/08.php?idx=8346&p=1&key=subject&keyword=졸업요건&bbscode=cate0501&reCategory=&kind=view',
};

export const DOMAIN_TO_CMS_DEPARTMENTS = [
'사학과',
'일어일문학부',
'정치외교학과',
'기계시스템공학전공',
];

export const GRADUATION_IN_ARCHIVE = [
'식품공학전공',
'생물공학과',
'수해양산업교육과',
];

export const GRADUATION_IN_NOTICE_A: Keywords = {
화학공학과: 'chemeng',
생태공학전공: 'ecoeng',
재료공학전공: '/materials',
};

export const GRADUATION_IN_NOTICE_B = [
'법학과',
'경영학부',
'조선해양시스템공학전공',
'언론정보전공',
'글로벌자율전공학부',
'경제학과',
'화학과',
'고분자공학전공',
'산업경영공학전공',
'기술·데이터공학전공',
'안전공학전공',
'건축공학과',
'건축학전공',
'식품영양학전공',
'자원생물학전공',
'수산생명의학과',
'해양학전공',
'해양공학과',
'에너지자원공학과',
'해양스포츠전공',
'평생교육·상담학전공',
'경찰범죄심리학전공',
'기계조선에너지시스템공학전공',
'양식응용생명과학전공',
];
228 changes: 220 additions & 8 deletions src/crawling/graduationRequirementsCrawling.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,239 @@
import db from '@db/index';
import axios from 'axios';
import * as cheerio from 'cheerio';
import { College } from 'src/@types/college';
import {
DOMAIN_TO_CMS_DEPARTMENTS,
EXCEPTIONAL_GRADUATION_KEYWORDS,
EXCEPTIONAL_GRADUATION_LINKS,
EXCEPTIONAL_SEARCH_KEYWORDS,
GRADUATION_IN_ARCHIVE,
GRADUATION_IN_NOTICE_A,
GRADUATION_IN_NOTICE_B,
} from 'src/constants/graduation';

export const graduationRequirementsCrawling = async (
interface DepartmentItem {
departmentName: string;
departmentLink: string;
}
// 졸업요건 크롤링 키워드 관련 예외처리를 위한 함수
const targetKeywordHandler = (departmentName: string) => {
const originKeyword = '졸업요건';
if (!Object.keys(EXCEPTIONAL_GRADUATION_KEYWORDS).includes(departmentName)) {
return originKeyword;
}
return EXCEPTIONAL_GRADUATION_KEYWORDS[departmentName];
};
// 졸업요건을 찾기 위한 페이지 키워드 관련 예외 처리를 위한 함수
const searchKeywordHandler = (departmentName: string) => {
const originKeyword = '공지사항';
if (!Object.keys(EXCEPTIONAL_SEARCH_KEYWORDS).includes(departmentName)) {
return originKeyword;
}
return EXCEPTIONAL_SEARCH_KEYWORDS[departmentName];
};
// 도메인관련 예외처리를 위한 함수
const graduationDomainHandler = (
departmentName: string,
departmentLink: string,
graduationLink: string,
) => {
if (
!DOMAIN_TO_CMS_DEPARTMENTS.includes(departmentName) &&
!Object.keys(GRADUATION_IN_NOTICE_A).includes(departmentName) &&
!GRADUATION_IN_NOTICE_B.includes(departmentName) &&
departmentName !== '식품공학전공'
) {
return departmentLink + graduationLink;
}
if (DOMAIN_TO_CMS_DEPARTMENTS.includes(departmentName)) {
const domainTarget = departmentLink.split('//')[1].split('.')[0];
return departmentLink.replace(domainTarget, 'cms') + graduationLink;
}
if (Object.keys(GRADUATION_IN_NOTICE_A).includes(departmentName)) {
return 'https://cms.pknu.ac.kr' + graduationLink;
}
if (departmentName === '식품공학전공') {
return departmentLink.replace('/view.do?no=405', '') + graduationLink;
}
return departmentLink.split('?')[0] + graduationLink;
};
// 졸업요건 크롤링을 공지사항 페이지에서 해야 하는 학과를 처리하기 위한 함수
const noticeLinkHandler = async (
departmentName: string,
departmentLink: string,
) => {
const response = await axios.get(departmentLink);
const $ = cheerio.load(response.data);

const targetName = searchKeywordHandler(departmentName);
const selector = `:contains("${targetName}")`;
const targetElements = $(selector);

let noticeLink: string;
targetElements.each((index, element) => {
const link = $(element).attr('href');
if (link !== undefined) {
noticeLink = departmentLink + link;
return false;
}
});

return noticeLink;
};
// 졸업요건 크롤링을 시작하는 페이지를 링크를 처리하기 위한 함수
const graduationLinkHandler = async (
departmentName: string,
departmentLink: string,
) => {
if (departmentLink === undefined) return;
if (departmentLink.endsWith('/'))
departmentLink = departmentLink.slice(0, -1);
const graduationLink: string[] = [];
if (
!GRADUATION_IN_ARCHIVE.includes(departmentName) &&
!Object.keys(GRADUATION_IN_NOTICE_A).includes(departmentName) &&
!GRADUATION_IN_NOTICE_B.includes(departmentName)
) {
return departmentLink;
}

if (GRADUATION_IN_ARCHIVE.includes(departmentName)) {
const response = await axios.get(departmentLink);
const $ = cheerio.load(response.data);
const targetName = '자료실';
const selector = `:contains("${targetName}")`;
const targetElements = $(selector);

let archiveLink: string;
targetElements.each((index, element) => {
const link = $(element).attr('href');
if (link !== undefined) {
archiveLink = departmentLink + link;
return false;
}
});
return archiveLink;
}

const additionalLink_A = '&pageIndex=1&view=list&sv=TITLE&sw=졸업요건';
if (Object.keys(GRADUATION_IN_NOTICE_A).includes(departmentName)) {
const noticeLink = await noticeLinkHandler(departmentName, departmentLink);
if (departmentName !== '재료공학전공') {
return (
noticeLink.replace(GRADUATION_IN_NOTICE_A[departmentName], 'cms') +
additionalLink_A
);
}
return (
noticeLink.replace(GRADUATION_IN_NOTICE_A[departmentName], '') +
additionalLink_A
);
}

const additionalLink_B_1 =
'?pageIndex=1&searchCondition=title&searchKeyword=졸업요건';
const additionalLink_B_2 =
'?pageIndex=1&searchCondition=title&searchKeyword=졸업+요건';
const noticeLink = await noticeLinkHandler(departmentName, departmentLink);
if (departmentName === '수산생명의학과') {
return noticeLink.replace('4208', '4229') + additionalLink_B_1;
} else if (
departmentName === '기술·데이터공학전공' ||
departmentName === '산업경영공학전공'
) {
return noticeLink.replace('1849', '721') + additionalLink_B_1;
} else if (departmentName === '양식응용생명과학전공') {
return noticeLink + additionalLink_B_2;
}
return noticeLink + additionalLink_B_1;
};
// 졸업요건 크롤링 함수(=~ main 함수)
const graduationRequirementsCrawling = async (
departmentName: string,
departmentLink: string,
): Promise<GraduationLink> => {
if (departmentLink === undefined) return;
if (departmentLink.endsWith('/')) {
departmentLink = departmentLink.slice(0, -1);
}
if (Object.keys(EXCEPTIONAL_GRADUATION_LINKS).includes(departmentName)) {
return {
department: departmentName,
link: EXCEPTIONAL_GRADUATION_LINKS[departmentName],
};
}
departmentLink = await graduationLinkHandler(departmentName, departmentLink);

const response = await axios.get(departmentLink);
const $ = cheerio.load(response.data);
const targetName = '졸업요건';

const targetName = targetKeywordHandler(departmentName);
const selector = `:contains("${targetName}")`;
const targetElements = $(selector);

let graduationLink = '';
targetElements.each((index, element) => {
const link = $(element).attr('href');
if (link !== undefined) {
const URL = departmentLink + link;
console.log(URL);
graduationLink.push(URL);
const URL = graduationDomainHandler(departmentName, departmentLink, link);
graduationLink = URL;
return false;
}
});

return graduationLink;
return {
department: departmentName,
link: graduationLink,
};
};

const getDepartmentLinks = async () => {
const SELECT_QUERY = 'SELECT * FROM departments;';
try {
const departmentLinks: DepartmentItem[] = [];
const queryResult = await new Promise<College[]>((resolve, reject) => {
db.query(SELECT_QUERY, (error, results) => {
if (error) {
console.error('SELECT 오류:', error);
reject(error);
} else {
resolve(results as College[]);
}
});
});

queryResult.forEach((result) => {
departmentLinks.push({
departmentName:
result.departmentSubName === '-'
? result.departmentName
: result.departmentSubName,
departmentLink: result.departmentLink,
});
});
return departmentLinks;
} catch (error) {
console.error('에러 발생:', error);
}
};

interface GraduationLink {
department: string;
link: string;
}

export const crawlingGraudationLinks = async () => {
const departmentItems = await getDepartmentLinks();
const graduationLinks: GraduationLink[] = [];

for (const departmentItem of departmentItems) {
const { departmentName, departmentLink } = departmentItem;
const graduationItem: GraduationLink = await graduationRequirementsCrawling(
departmentName,
departmentLink,
);

graduationLinks.push(graduationItem);
}

return graduationLinks;
};
Loading

0 comments on commit 0d054a0

Please sign in to comment.