隐约雷鸣,阴霾天空。

使用python批量下载PT站小种

学习python也有十来天了,是时候找个东西练练手了
想来想去还是写爬虫比较实用,一开始写了个爬妹子图的(话说这网站是真的惨,每个刚开始学爬虫的都要拿它练练手)不过不小心手一抖给删了。
然后就萌生了爬pt站的想法。

代码如下:`

import requests
import os
import re
import lxml
import bs4
import time
num = []
def login_info2(str2,username,password):
    s = login_info(str2,username,password)
    passkey = get_passkey(str2,s)
    print('你的passkey是:',passkey)
    print('你需要下载几页种子?(每页种子的个数见站点设置,第一页不包含置顶种子)')
    page_quantity = input()
    page_quantity = int(page_quantity)
    get_torrent_id(str2,s,page_quantity)
    download_torrent(str2,s,passkey)

def login_info(str2,username,password):
    print(str2)
    header['origin'] = str2
    header['referer'] = str2 + '/login.php'
    form_data['username'] = username
    form_data['password'] = password
    url = str2 + '/takelogin.php'
    s = requests.session()
    s.post(url ,data = form_data,headers = header)
    return s

def get_passkey(str2,s):
    url_user = str2 + '/usercp.php' 
    html = s.get(url_user,headers = header)
    x = re.findall(r'[a-z0-9]{32,50}',html.text)
    for i in x:
        if len(i) == 32:
            return i
def get_torrent_id(str2,s,page_quantity):

    for i in range(page_quantity):
        temp = i
        small_torrent_url = str2 +'/torrents.php?inclbookmarked=0&incldead=1&spstate=0&&sort=5&type=asc&page=' + str(i)
        print(small_torrent_url)
        html = s.get(small_torrent_url,headers = header)
        if str2 =='https://ourbits.club':
            soup = bs4.BeautifulSoup(html.content, 'lxml')
            a =  soup.find_all('tr',class_='sticky_blank')
            for i in range(0,len(a),2):
                str1 = str(a[i])
                b = str1.find('download.php?id=') + 16
                c = str1.find('"',b)
                num.append(str1[b:c])
        else:
            x = 0
            str_html = html.text
            a = str_html.find('置顶') + 2
            str_html = str_html[a:]
            while(a!=1):
                a = str_html.find('置顶') + 2
                str_html = str_html[a:]
                x+=1
            else:
                if x!=0:
                    x = x-1
                for i in range(50-x):
                    b = str_html.find('download.php?id=') + 16
                    c = str_html.find('"',b)
                    num.append(str_html[b:c])
                    str_html = str_html[c:]
                if temp == 0:
                    del num[0]


def download_torrent(str2,s,passkey):
    os.mkdir('Z:\\torrent')
    os.chdir('Z:\\torrent')
    for torrent_id in num: 
        download_torrent_url = str2 + '/download.php?id=' + torrent_id +'&passkey='+passkey+'&https=1'
        print(download_torrent_url)
        torrent = s.get(download_torrent_url,headers = header)
        time.sleep(15)
        name = str(torrent_id) + '.torrent'
        with open(name,'wb') as f:
            f.write(torrent.content)

www = {
    'Ourbits':'https://ourbits.club',
    '动漫花园u2':'https://u2.dmhy.org'
}

form_data = {
        'username': '',
        'password': '',
        '2fa_code': '',
        'trackerssl': 'yes',
        }
header = {
        'cookie':'',
        'dnt': '1',
        'origin': '',
        'referer': '',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
    }

def login():
    print('输入要登陆的站点:\n1.Ourbits\n2.动漫花园u2')
    num = input()
    print('等待一下,时间视网速而定...')
    if num == '1':
        print('输入账号:')
        username = input()
        print('输入密码:')
        password = input()
        str2 = www['Ourbits']
        print('输入cookie:')
        cookie = input()
        header['cookie'] = cookie
        login_info2(str2,username,password)
    elif num == '2':
        print('输入账号:')
        username = input()
        print('输入密码:')
        password = input()
        print('输入cookie:')
        cookie = input()
        header['cookie'] = cookie
        str2 = www['动漫花园u2']
        login_info2(str2,username,password)
    else:
        print('你要输入一个正确的选项')
    login()
login()

支持ourbits和u2,馒头其实也可以不过得把get_torrent_id函数里的"置顶"换成繁体的,然后改一下相关链接就ok了。默认保存在Z盘的torrent文件夹(运行程序之前不能有该文件夹,否则会报错),设置的是每15秒下载一个种子,因为有些PT站(比如ourbits,u2在短时间内请求次数过多也会被关闭连接)限制了每分钟能够下载的种子数。

添加新评论