trybeetle

take it slow!!

google imageをscraping


Posted on Oct. 17, 2018, 2:12 a.m.



SelenmiumでScraping

seleniumを使用し、google image検索の結果をscrapingし、画像を保存します。


#!/usr/bin/env python
import os
import time
from urllib import request
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementNotVisibleException

def saveimg(search_word, num_scroll):

    driver = webdriver.Chrome()
    driver.get('https://images.google.com/')
    elem = driver.find_element_by_class_name('gLFyf')
    elem.clear()
    elem.send_keys(search_word)
    elem.send_keys(Keys.RETURN)

    for i in range(num_scroll):        
        for j in range(5):
            driver.execute_script('window.scrollBy(0,10000)')
            time.sleep(1)
        try:
            driver.find_element_by_id('smb').click()
            time.sleep(1)
        except ElementNotVisibleException:
            pass
    time.sleep(1)

    os.makedirs(search_word,exist_ok=True)
    imgs = driver.find_elements_by_class_name('rg_ic')
    print('Number of Images: ' + str(len(imgs)))
    for count,img in enumerate(imgs):
        src = str(img.get_attribute('src'))
        if src == 'None':
            continue
        else:
            path = '{0}/{1}{2}.jpg'.format(search_word,search_word,count)
            request.urlretrieve(src, path)


if __name__ == '__main__':
    search_word = 'dog'
    num_scroll = 3
    saveimg(search_word, num_scroll)

Category:ML
Tag: ML python selenium
Oct. 17, 2018, 2:12 a.m.

Comments