본문 바로가기

Language/파이썬

Building a Job Scrapper(4)

 

 

 

#2 7 Extracting Companies

 

 

 

 

#company 출력하기

 

def extract_indeed_jobs(last_page):

jobs = []

 

#for page in range(last_page):

result = requests.get(f"{URL}&start={0*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

 

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

 

for result in results:

 

title = result.find("div",{"class":"title"}).find("a")["title"]

 

company = result.find("span", {"class":"company"}).find("a")

 

print(company)



return jobs

 

company = result.find( "span" , { "class" : "company" }).find( "a" )

 

 

 

 

 

 

 

#링크가 있는 회사도 있고 없는 회사도 있고..............! if문을 쓸때가 온거군^^

def extract_indeed_jobs(last_page):

jobs = []

 

#for page in range(last_page):

result = requests.get(f"{URL}&start={0*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

 

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

 

for result in results:

 

title = result.find("div",{"class":"title"}).find("a")["title"]

 

company = result.find("span", {"class":"company"}).find("a")  #find("a")이부분을 delete시켜야 에러 안생김.

 

if company.find("a") is not None:

print(company.find("a").string)

else:

print(company.string)



return jobs



return jobs

 

<참고: 에러. find("a")을 없애주어야 함>

 

 

 

 

 

 

#다시 수정; 회사이름을 선택적으로 가져왔다. 앵커가 없는 것도 none발생 하지 않음

 

def extract_indeed_jobs(last_page):

jobs = []

 

#for page in range(last_page):

result = requests.get(f"{URL}&start={0*LIMIT}")

soup = BeautifulSoup(result.text, "html.parser")

 

results = soup.find_all("div", {"class": "jobsearch-SerpJobCard"})

 

for result in results:

 

title = result.find("div",{"class":"title"}).find("a")["title"]

 

company = result.find("span", {"class":"company"})

 

if company.find("a") is not None:

print(company.find("a").string)

else:

print(company.string)



return jobs

 



 

#if문 코드를 간단하게 수정

for result in results:

 

title = result.find("div",{"class":"title"}).find("a")["title"]

 

company = result.find("span", {"class":"company"})

 

company_anchor = company.find("a")  # 수정

 

if company.find("a") is not None:

print(company_anchor.string)    #수정

else:

print(company.string)

 

return jobs

 

<출력: ok>

 

 

#빈곳이 나오지 않도록 수정

if company.find("a") is not None:

print(str(company_anchor.string))

else:

print(str(company.string))



return jobs

 

 

 

# 주변 정리

if company.find("a") is not None:

company = str(company_anchor.string)

else:

company = str(company.string)

company = company.strip()

print(company)

 

return jobs

 

 

 

#타이틀도 출력

if company.find("a") is not None:

company = str(company_anchor.string))  # )) 두개를 하나로 수정하기

else:

company = str(company.string))   # 두개를 하나로 수정하기

company = company.strip()

print*(title,company)

 

return jobs

<참고: 에러 >

 

 

 

 

#다시 

if company.find("a") is not None:

company = str(company_anchor.string)

else:

company = str(company.string)

company = company.strip()

print(title,company)

 

return jobs

 

print (title,company)

 

 

 

 

 

 

 

 

 

 

 

'Language > 파이썬' 카테고리의 다른 글

Building a Job Scrapper(5)  (0) 2020.01.04
Building a Job Scrapper(3)  (0) 2020.01.04
Building a Job Scrapper(2)  (0) 2020.01.04
Building a Job Scrapper  (0) 2020.01.02
Data Analysis/데이터로 그래프그리기  (0) 2018.01.05