更新时间2021-07-11 09:05:21
import requestsimport bs4import xlwtfrom time import sleepheaders = { 'user - agent': 'Mozilla / 5.0(WindowsNT10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) chrome / 80.0.3987.116Safari / 537.36'} def get_content(url): res = requests.get(url=url, headers=headers) html = res.text soup = bs4.BeautifulSoup(html, 'html.parser') soup = soup.select('.novelslistss li') list_all = [] for novel in soup[0: 50]: novel_type = novel.select('.s1')[0].string novel_name = novel.select('.s2 a')[0].string latest_chapters = novel.select('.s3 a')[0].string author = novel.select('.s4')[0].string update_time = novel.select('.s5')[0].string list_all.append([novel_type, novel_name, latest_chapters, author, update_time]) return list_all def main(): list_all = list() path = 'D:/笔趣阁目录.xls' workbook = xlwt.Workbook(encoding='utf-8', style_compression=0) worksheet = workbook.add_sheet('小说目录', cell_overwrite_ok=True) # 可覆盖 # 设置工作表名 col = ('小说类型', '小说名', '最新章节', '作者', '最新更新时间') for i in range(0, 5): worksheet.write(0, i, col[i]) # 设置列名 for i in range(1, 5): # 根据想要爬的页数而改动 url = 'https://www.***.net/top/allvisit/{}.html'.format(i) data_list = get_content(url) list_all.append([data_list]) for i in range(len(list_all)): # i=0~1 sleep(0.5) # 延迟0.5秒) print('正在下载第{}页目录=====> 请稍后'.format(i+1)) data_s = list_all[i] for j in range(len(data_s)): # j=0 data = data_s[j] for k in range(len(data)): # k=0~49 data_simple = data[k] for m in range(0, 5): # m=0~4 worksheet.write(1 + i * 50 + k, m, data_simple[m]) workbook.save(path) print('所检索所有页面目录=======> 全部保存成功!'.format(i)) if __name__ == '__main__': main() 原文链接:https://blog.csdn.net/qq_44921056/article/details/113832307
上一篇:关于物联网应用技术是否值得选择
下一篇:七星送子,惹人醉。是什么意思?