session = HTMLSession() url = "https://my.oschina.net/u/4798232" r = session.get(url)
for i inrange(1, 12): i = str(i) url_xpath = '//div[@id="newestBlogList"]/div[1]/div[' + i + ']/div/a' title = r.html.xpath(url_xpath, first=True).text link_xpath = url_xpath + '/@href' link = r.html.xpath(link_xpath, first=True) a = "原" if a notin title: print(title) print(link) else: print(title.replace(a, "")) print(link)
<linkrel="stylesheet"media="screen and (max-width:1400px)"href="./css/public2.css"> <linkrel="stylesheet"media="screen and (max-width:1000px)"href="./css/public3.css">