-
Notifications
You must be signed in to change notification settings - Fork 0
/
spider.py
90 lines (74 loc) · 3.12 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import re
import time
import json
import requests
from urllib.parse import urlparse, urlunparse
from requests.compat import quote, urljoin
from PIL import Image
from io import BytesIO
class Spider(object):
def download_img(self, save_path, link):
print('download_img from ', link, end=' ')
resp = requests.get(link, stream=True)
print('status_code=', resp.status_code, end=' ')
if 200 == resp.status_code:
if re.search(r'image', resp.headers['content-type'], re.I):
print(f'is image, saving to {save_path}', end=' ')
img = Image.open(BytesIO(resp.content))
img.save(save_path)
print('ok', end=' ')
def download_icon_unit(self, start=1001, end=1300, star=3):
base = 'https://redive.estertion.win/icon/unit/'
save_dir = './unit/'
os.makedirs(save_dir, exist_ok=True)
def get_pic_name(pic_id, pre, end):
return f'{pre}{pic_id:0>4d}{end}'
for i in range(start, end):
src_n = get_pic_name(i, '', f'{star}1.webp')
dst_n = get_pic_name(i, 'icon_unit_', f'{star}1.png')
self.download_img(os.path.join(save_dir, dst_n), urljoin(base, src_n))
time.sleep(0.5)
print('\n', end='')
def download_comic(self, start=1, end=200, only_index=False):
base = 'https://comic.priconne-redive.jp/api/detail/'
save_dir = './comic/'
os.makedirs(save_dir, exist_ok=True)
def get_pic_name(id_):
pre = 'episode_'
end = '.png'
return f'{pre}{id_}{end}'
index = {}
for i in range(start, end):
print('getting comic', i, '...', end=' ')
url = base + str(i)
print('url=', url, end=' ')
resp = requests.get(url)
print('status_code=', resp.status_code)
if 200 != resp.status_code:
continue
data = resp.json()[0]
# if data['current_index'] != False:
episode = data['episode_num']
title = data['title']
link = data['cartoon']
index[episode] = {'title': title, 'link': link}
print(index[episode])
if not only_index:
self.download_img(os.path.join(save_dir, get_pic_name(episode)), link)
time.sleep(0.1)
print('\n', end='')
# else:
# print('current_index not True, ignore')
with open(os.path.join(save_dir, 'index.json'), 'w', encoding='utf8') as f:
json.dump(index, f, ensure_ascii=False)
if __name__ == '__main__':
spider = Spider()
# 运行过程出现404和200属正常情况
spider.download_icon_unit(start=1001, end=1200, star=3)
spider.download_icon_unit(start=1001, end=1200, star=6)
spider.download_icon_unit(start=1802, end=1805, star=3)
spider.download_icon_unit(start=1802, end=1805, star=6)
#spider.download_icon_unit(start=1400, end=1805, star=3)
# 如果需要爬取漫画,取消掉下面的这行注释
# spider.download_comic(start=1, end=200, only_index=False)