算法实例 :批量提取网址
import time
import requests
import os
from bs4 import BeautifulSoup
import re
import numpy as np
from time import *
begin_time = time()
# 创建一个txt文件,文件名为mytxtfile,
for k in np.arange(1,1835):
k2=str(k)
s='3D-Models-'+k2+'.txt'
f = open(s, "r", encoding='GBK',errors='ignore')
txt=[]
line = f.readline() #读取第一行
i=0
while(i<39000):
i=i+1
line=f.readline()
if re.search('data-link',line)!=None:
b=re.findall('data-link="(.*)" data-price=',line)
txt.append(b[0])
#print(type(b))
#print(b[0])
print(k,len(txt))
for i in range(len(txt)):
#b=re.findall('data-link="(.*)" data-price=',txt[i])
with open("000-3D-Models-Link-total.txt","a") as f:
f.write(txt[i]) # 自带文件关闭功能,不需要再写f.close()
f.write('\n')
end_time = time()
run_time = end_time-begin_time
print ('该循环程序运行时间:',run_time) #该循环程序运行时间: