Building a Job Scrapper(5)

#2.8 Extracting Locations and Finishing up

# 반복될 수 있는 부분을 함수로 만들어 둔다.

def extract_job(html): #이 함수를 만들었다. html이 result 이다.

title = html.find("div",{"class":"title"}).find("a")["title"]

company = html.find("span", {"class":"company"})

company_anchor = company.find("a")

if company.find("a") is not None:

company = str(company_anchor.string)

else:

company = str(company.string)

company = company.strip()

return{'title':title,'company':company}

def extract_indeed_jobs(last_page):

jobs = []

#for page in range(last_page):

result = requests.get(f"{URL}&start={0*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

for result in results:

job = extract_job(result) #result가 html이다.

print(job)

return jobs

def extract_indeed_jobs(last_page):

jobs = []

#for page in range(last_page):

result = requests.get(f"{URL}&start={0*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

for result in results:

job = extract_job(result)

jobs.append(job)

return jobs

main.py

from indeed import extract_indeed_pages, extract_indeed_jobs

last_indeed_page = extract_indeed_pages()

indeed_jpbs = extract_indeed_jobs(last_indeed_page)

print(indeed_jpbs)

#최종

import requests

from bs4 import BeautifulSoup

LIMIT = 50

URL = f"https://www.indeed.com/jobs?q=python&limit={LIMIT}"

def extract_indeed_pages():

result = requests.get(URL)

soup = BeautifulSoup(result.text, "html.parser")

pagination = soup.find("div", {"class": "pagination"})

links = pagination.find_all('a')

pages = []

for link in links[:-1]:

pages.append(int(link.string))

#print(spans)

#pages = pages[0:-1]

max_page = pages[-1]

return(max_page)

def extract_job(html):

title = html.find("div",{"class":"title"}).find("a")["title"]

company = html.find("span", {"class":"company"})

company_anchor = company.find("a")

if company.find("a") is not None:

company = str(company_anchor.string)

else:

company = str(company.string)

company = company.strip()

location = html.find("div",{"class":"recJobLoc"})["data-rc-loc"]

job_id = html["data-jk"]

return{

'title':title,

'company':company,

'location':location,

"link":f"https://www.indeed.com/viewjob?jk={job_id}"

}

def extract_indeed_jobs(last_page):

jobs = []

for page in range(last_page):

print(f"Scrapping pge{page}") #추가

result = requests.get(f"{URL}&start={page*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

for result in results:

job = extract_job(result)

jobs.append(job)

return jobs

저작자표시 비영리 변경금지

'Language > 파이썬' 카테고리의 다른 글

Building a Job Scrapper(4) (0)	2020.01.04
Building a Job Scrapper(3) (0)	2020.01.04
Building a Job Scrapper(2) (0)	2020.01.04
Building a Job Scrapper (0)	2020.01.02
Data Analysis/데이터로 그래프그리기 (0)	2018.01.05

하루 배움을 기록, 개발노트

Building a Job Scrapper(5)

'Language > 파이썬' 카테고리의 다른 글

티스토리툴바

Building a Job Scrapper(5)

'Language > 파이썬' 카테고리의 다른 글

'Language/파이썬' Related Articles

티스토리툴바