学习python也有十来天了,是时候找个东西练练手了
想来想去还是写爬虫比较实用,一开始写了个爬妹子图的(话说这网站是真的惨,每个刚开始学爬虫的都要拿它练练手)不过不小心手一抖给删了。
然后就萌生了爬pt站的想法。
代码如下:`
import requests
import os
import re
import lxml
import bs4
import time
num = []
def login_info2(str2,username,password):
s = login_info(str2,username,password)
passkey = get_passkey(str2,s)
print('你的passkey是:',passkey)
print('你需要下载几页种子?(每页种子的个数见站点设置,第一页不包含置顶种子)')
page_quantity = input()
page_quantity = int(page_quantity)
get_torrent_id(str2,s,page_quantity)
download_torrent(str2,s,passkey)
def login_info(str2,username,password):
print(str2)
header['origin'] = str2
header['referer'] = str2 + '/login.php'
form_data['username'] = username
form_data['password'] = password
url = str2 + '/takelogin.php'
s = requests.session()
s.post(url ,data = form_data,headers = header)
return s
def get_passkey(str2,s):
url_user = str2 + '/usercp.php'
html = s.get(url_user,headers = header)
x = re.findall(r'[a-z0-9]{32,50}',html.text)
for i in x:
if len(i) == 32:
return i
def get_torrent_id(str2,s,page_quantity):
for i in range(page_quantity):
temp = i
small_torrent_url = str2 +'/torrents.php?inclbookmarked=0&incldead=1&spstate=0&&sort=5&type=asc&page=' + str(i)
print(small_torrent_url)
html = s.get(small_torrent_url,headers = header)
if str2 =='https://ourbits.club':
soup = bs4.BeautifulSoup(html.content, 'lxml')
a = soup.find_all('tr',class_='sticky_blank')
for i in range(0,len(a),2):
str1 = str(a[i])
b = str1.find('download.php?id=') + 16
c = str1.find('"',b)
num.append(str1[b:c])
else:
x = 0
str_html = html.text
a = str_html.find('置顶') + 2
str_html = str_html[a:]
while(a!=1):
a = str_html.find('置顶') + 2
str_html = str_html[a:]
x+=1
else:
if x!=0:
x = x-1
for i in range(50-x):
b = str_html.find('download.php?id=') + 16
c = str_html.find('"',b)
num.append(str_html[b:c])
str_html = str_html[c:]
if temp == 0:
del num[0]
def download_torrent(str2,s,passkey):
os.mkdir('Z:\\torrent')
os.chdir('Z:\\torrent')
for torrent_id in num:
download_torrent_url = str2 + '/download.php?id=' + torrent_id +'&passkey='+passkey+'&https=1'
print(download_torrent_url)
torrent = s.get(download_torrent_url,headers = header)
time.sleep(15)
name = str(torrent_id) + '.torrent'
with open(name,'wb') as f:
f.write(torrent.content)
www = {
'Ourbits':'https://ourbits.club',
'动漫花园u2':'https://u2.dmhy.org'
}
form_data = {
'username': '',
'password': '',
'2fa_code': '',
'trackerssl': 'yes',
}
header = {
'cookie':'',
'dnt': '1',
'origin': '',
'referer': '',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
def login():
print('输入要登陆的站点:\n1.Ourbits\n2.动漫花园u2')
num = input()
print('等待一下,时间视网速而定...')
if num == '1':
print('输入账号:')
username = input()
print('输入密码:')
password = input()
str2 = www['Ourbits']
print('输入cookie:')
cookie = input()
header['cookie'] = cookie
login_info2(str2,username,password)
elif num == '2':
print('输入账号:')
username = input()
print('输入密码:')
password = input()
print('输入cookie:')
cookie = input()
header['cookie'] = cookie
str2 = www['动漫花园u2']
login_info2(str2,username,password)
else:
print('你要输入一个正确的选项')
login()
login()
支持ourbits和u2,馒头其实也可以不过得把get_torrent_id函数里的"置顶"换成繁体的,然后改一下相关链接就ok了。默认保存在Z盘的torrent文件夹(运行程序之前不能有该文件夹,否则会报错),设置的是每15秒下载一个种子,因为有些PT站(比如ourbits,u2在短时间内请求次数过多也会被关闭连接)限制了每分钟能够下载的种子数。