Python | 下载你的小说

伊剑 · 发表于 2023-3-23 14:39:24

没网的时候想看小说，咱就只能下载下来，那咱从《笔趣阁》上面扒一扒吧。

思路

咱先要知道小说的地址
创建一个下载本小说的文件夹
获取小说的所有章节
获取每章小说的内容
写入小说

代码

import requests
from bs4 import BeautifulSoup
import random
import os

# 小说地址
url = &#34;http://www.ibiqu.org/book/38857/&#34;

# 小说文件夹名称
folder = url.split(&#39;/&#39;)

# 列举头
user_agent = [&#34;Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1 &#34;,
            &#34;Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0 &#34;,
            &#34;Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50 &#34;,
            &#34;Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50 &#34;,
            &#34;Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)&#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3) &#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0) &#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)&#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) &#34;,
            &#34;Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12 &#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) &#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) &#34;,
            &#34;Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0 &#34;,
            &#34;Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) &#34;,
            &#34;Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201 &#34;,
            &#34;Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E) QQBrowser/6.9.11079.201 &#34;,
            &#34;Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0) &#34;
            ]

# 随机使用头
headers = {
&#39;User-Agent&#39;: user_agent[random.randint(0, 16)]
}

# 创建文件夹
def mkdir():
mkdir_url = folder[len(folder)-2]
if os.path.exists(mkdir_url):
      print(f&#39;{mkdir_url}文件夹已存在&#39;)
else:
      os.mkdir(mkdir_url)
      print(f&#39;{mkdir_url}已创建&#39;)
run()

# 获取章节地址
def run():
print(&#39;正在获取书籍章节&#39;)
res = requests.get(url, headers=headers, timeout=60)
res.encoding = &#39;gbk&#39;
if res.status_code == 200:
      soup = BeautifulSoup(res.text, &#39;lxml&#39;)
      a = soup.find_all(&#39;a&#39;)
      for i in a[38:len(a)-10]:
         href_list = i.get(&#39;href&#39;).split(&#39;/&#39;)
         href = href_list[len(href_list)-1]
         title = i.text
         cab(href, title)

# 获取每章的内容
def cab(href, title):
print(f&#39;正在获取{title}内容&#39;)
res = requests.get(f&#39;{url}{href}&#39;, headers=headers, timeout=60,)
res.encoding = &#39;gbk&#39;
if res.status_code == 200:
      soup = BeautifulSoup(res.text, &#39;lxml&#39;)
      a = soup.find_all(&#39;div&#39;, id=&#39;content&#39;)
      write(a[0].text, title)

# 写入小说
def write(content, title):
fs = open(f&#39;./{folder[len(folder)-2]}/{title}.txt&#39;, &#39;a&#39;, encoding=&#39;utf-8&#39;)
fs.write(str(content))
fs.close()
print(f&#34;已保存{title}&#34;)

mkdir()
结论

下载的过程

下载的结果

		自动登录	找回密码
密码			立即注册

Python | 下载你的小说

浏览过的版块