本文共 933 字,大约阅读时间需要 3 分钟。
import requestsfrom lxml import etree
def get_page(url): try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"} res = requests.get(url=url, headers=headers) res.encoding = 'utf-8' if res.status_code == 200: return res.text else: return None except Exception: return None
if __name__ == '__main__': url = 'https://www.*****.com/case/' res = get_one_page(url) tree = etree.HTML(res) cons = tree.xpath('//div[@id="case_list"]/div') # 返回case_list下所有div con = tree.xpath('//div[@id="case_list"]/div[1]') # 返回case_list下第一个div con1 = tree.xpath('//div[@id="case_list"]/div[1]/div/a/@href') # 返回case_list下第一个div下div下a的属性值 for con in cons: href = con.xpath('./div/a/@href') # ./表示当前标签 print(href)
转载地址:http://zwclf.baihongyu.com/