노마드코더님 감사합니다.^^
#2.6 Extracting Titles https://academy.nomadcoders.co/courses/681401/lectures/12171971
import requests
from bs4 import BeautifulSoup
LIMIT = 50
URL = f"https://www.indeed.com/jobs?q=python&limit={LIMIT}"
def extract_indeed_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
pagination = soup.find("div", {"class": "pagination"})
links = pagination.find_all('a')
pages = []
for link in links[:-1]:
pages.append(int(link.string))
#print(spans)
#pages = pages[0:-1]
max_page = pages[-1]
return(max_page)
def extract_indeed_jobs(last_page):
jobs = []
for page in range(last_page):
result = requests.get(f"{URL}&start={page*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
print(results)
return jobs
<참고>
실행하면 매 20페이지씩 출력되므로 엄청나다. 왠만하면 실행하지 않는 것이.................ㅎㅎ
#그래서 한장만 출력하도록 수정..........
import requests
from bs4 import BeautifulSoup
LIMIT = 50
URL = f"https://www.indeed.com/jobs?q=python&limit={LIMIT}"
def extract_indeed_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text, "html.parser")
pagination = soup.find("div", {"class": "pagination"})
links = pagination.find_all('a')
pages = []
for link in links[:-1]:
pages.append(int(link.string))
#print(spans)
#pages = pages[0:-1]
max_page = pages[-1]
return(max_page)
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}") #start={page*LIMIT}")을 수정함
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
print(results)
return jobs
<참고: 1장분만 출력돼 나온다^^>
#result는 soup의 리스트이다. title부분을 출력해보면.....
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
print(result.find_all("div",{"class":"title"}))
return jobs
<참고: 타이틀이 들어간 부분만으로 출력된다. >
#result는 soup의 리스트이다. title로 바꾸고 앵커 a를 출력해보면.....
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
title = result.find_all("div",{"class":"title"})
print(title.find("a"))
return jobs
# title = result.find("div",{"class":"title"})로 수정해서 다시 출력. 왜냐면 다음 작업에서 에러가^^
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
title = result.find("div",{"class":"title"})
print(title.find("a"))
return jobs
# title을 넣어서. soup.find_all로 작업하면 에러출력하니 조심ㅠㅠ
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
title = result.find("div",{"class":"title"}) #soup.find_all로 작업하면 에러출력하니 조심ㅠㅠ
anchor = title.find("a")["title"]
print(anchor)
return jobs
#코드를 한줄로 줄여보자
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
title = result.find("div",{"class":"title"}).title.find("a")["title"] #이곳 처럼 하면 에러 작렬^^
print(title)
return jobs
# 다시
def extract_indeed_jobs(last_page):
jobs = []
#for page in range(last_page):
result = requests.get(f"{URL}&start={0*LIMIT}")
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})
for result in results:
title = result.find("div",{"class":"title"}).find("a")["title"]
print(title)
return jobs
<출력: ok>
'Language > 파이썬' 카테고리의 다른 글
Building a Job Scrapper(5) (0) | 2020.01.04 |
---|---|
Building a Job Scrapper(4) (0) | 2020.01.04 |
Building a Job Scrapper(2) (0) | 2020.01.04 |
Building a Job Scrapper (0) | 2020.01.02 |
Data Analysis/데이터로 그래프그리기 (0) | 2018.01.05 |