언어/Python
Python etc2
hvoon
2022. 9. 7. 16:22
1. 링크 넣고 제목 따오기
import requests
from bs4 import BeautifulSoup
html=requests.get('https://entertain.daum.net/ranking/popular').text
html=BeautifulSoup(html,"html.parser")
print(html.find('a',{'class':'link_txt'}).text)
2.링크 넣고 제목 다 따오기
import requests
from bs4 import BeautifulSoup
html=requests.get('https://entertain.daum.net/ranking/popular').text
html=BeautifulSoup(html,"html.parser")
news_tit=html.findAll('a',{'class':'link_txt'})
for x in news_tit:
print(x.text)
3. 순위권 안에 제목 가져오기
import requests
from bs4 import BeautifulSoup
html=requests.get('https://entertain.daum.net/ranking/popular').text
html=BeautifulSoup(html,"html.parser")
news_tit=html.findAll('strong',{'class':'tit_thumb'})
for x in news_tit:
print(x.text)
4. 순위권 안에 들어가 제목과 본문 가져오기
import requests
from bs4 import BeautifulSoup
html=requests.get('https://entertain.daum.net/ranking/popular').text
html=BeautifulSoup(html,"html.parser")
news_tit=html.findAll('strong',{'class':'tit_thumb'})
for x in news_tit:
html=requests.get(x.find('a')['href']).text
html=BeautifulSoup(html, "html.parser")
news_tit=html.find('h3',{'class':'tit_view'})
content=html.find('div',{'class':'article_view'})
print(news_tit.text) #모니터에 출력
print(content.text)
5. 순위권 안에 들어가 제목과 본문 파일에 저장하기
import requests
from bs4 import BeautifulSoup
html=requests.get('https://entertain.daum.net/ranking/popular').text
html=BeautifulSoup(html,"html.parser")
news_tit=html.findAll('strong',{'class':'tit_thumb'})
nember=1
for x in news_tit:
file=open("news__"+str(nember)+'.txt','w',encoding='UTF-8')
number+=1
html=requests.get(x.find('a')['href']).text
html=BeautifulSoup(html, "html.parser")
news_tit=html.find('h3',{'class':'tit_view'})
content=html.find('div',{'class':'article_view'})
file.write(news_tit.text) #파일에 출력
file.write(content.text)
file.close()
6. 날짜마다
import requests
from bs4 import BeautifulSoup
for date in range(20211001,20211031,1):
html=requests.get('https://entertain.daum.net/ranking/popular?date='+str(date)).text
html=BeautifulSoup(html,"html.parser")
news_tit=html.findAll('div',{'class':'inner_cont2'})
nember=1
for x in news_tit:
file=open(str(date)+'__'+str(nember)+'.txt','w',encoding='UTF-8')
number+=1
html=requests.get(x.find('a')['href']).text
html = BeautifulSoup(html, "html.parser")
news_tit=html.find('h3',{'class':'tit_view'})
content=html.find('div',{'class':'article_view'})
file.write(news_tit.text) #파일에 출력
file.write(content.text)
file.close()