算法实例 :批量提取网址

import time

import requests

import os

from bs4 import BeautifulSoup

import re

import numpy as np

from time import *

begin_time = time()

# 创建一个txt文件,文件名为mytxtfile,

for k in np.arange(1,1835):

k2=str(k)

s='3D-Models-'+k2+'.txt'

f = open(s, "r", encoding='GBK',errors='ignore')

txt=[]

line = f.readline() #读取第一行

i=0

while(i<39000):

i=i+1

line=f.readline()

if re.search('data-link',line)!=None:

b=re.findall('data-link="(.*)" data-price=',line)

txt.append(b[0])

#print(type(b))

#print(b[0])

print(k,len(txt))

for i in range(len(txt)):

#b=re.findall('data-link="(.*)" data-price=',txt[i])

with open("000-3D-Models-Link-total.txt","a") as f:

f.write(txt[i])  # 自带文件关闭功能,不需要再写f.close()

f.write('\n')

end_time = time()

run_time = end_time-begin_time

print ('该循环程序运行时间:',run_time) #该循环程序运行时间:

(0)

相关推荐