쿠팡 상품 크롤러

📜쿠팡 상품 크롤링

  • 뉴스 검색 결과에서 제목 및 URL 크롤링

    • 조건 1. 100개까지만 상품 추출
    • 조건 2. 광고상품 표시
    • 조건 3. 엑셀 파일로 결과 저장
    import requests
    from bs4 import BeautifulSoup
    import pyautogui
    import openpyxl
    
    keyword = pyautogui.prompt("검색어를 입력하세요 >> ")
    
    wb = openpyxl.Workbook('coupang_result.xlsx')
    ws = wb.create_sheet(keyword)
    ws.append(['순위','브랜드명','상품명','가격','상세페이지링크'])
    
    rank = 1
    done = False
    
    for page in range(1,5):
        if done == True:
            break
        print(page, "번째 페이지 입니다.")
        main_url = f"https://www.coupang.com/np/search?&q={keyword}&page={page}"
    
        coupang_header = {
                'Host': 'www.coupang.com',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3',
            }
    
        response = requests.get(main_url, headers=coupang_header)
        html = response.text
        soup = BeautifulSoup(html, "html.parser")
    
        links = soup.select("a.search-product-link")
    
        for link in links:
            if len(link.select("span.ad-badge-text")) > 0:
                print("광고상품 입니다.")
            else:
                sub_url = "https://www.coupang.com/" + link.attrs['href']
                response = requests.get(sub_url, headers=coupang_header)
                html = response.text
                soup = BeautifulSoup(html, "html.parser")
    
                try:
                    brand_name = soup.select_one("a.prod-brand-name").text
                except:
                    brand_name = ""
    
                product_name = soup.select_one("h2.prod-buy-header__title").text
    
                try:
                    product_price = soup.select_one("span.total-price > strong").text
                except:
                    product_price =""
    
                print(rank, brand_name, product_name, product_price)
                ws.append([rank, brand_name, product_name, product_price, sub_url])
                rank = rank + 1
                if rank > 100:
                    done = True
                    break
    wb.save('coupang_result.xlsx')