?? htmlpa2.py
字號:
'''
note: this author is netspider_2007 of nxu.
welcome to my blog: netspider_2007
'''
import re
import urllib
import datetime
import os
def html2table(html):
trs = re.findall(r'<tr>.*?</tr>', html, re.DOTALL)
rows = []
for tr in trs:
x = re.findall(r'<td>(.*?)</td>', tr, re.DOTALL)
x = map(lambda s: s.strip(), x)
rows.append(x)
return rows
def getWebPageContent(url):
fu = urllib.urlopen(url)
data = fu.read()
fu.close()
return data
d=datetime.date.today()
url = r'http://www.malwaredomainlist.com/mdl.php?search='+d.isoformat().replace("-","/")+'&colsearch=All&quantity=All' #
html = urllib.urlopen(url).read()
name=d.isoformat()#.replace("-","/")
if not os.path.exists(r'd:/URLS/'+name):
os.makedirs(r'd:/URLS/'+name)
if not os.path.exists(r'd:/URLS/'+name+'/exe/'):
os.makedirs(r'd:/URLS/'+name+'/exe/')
if not os.path.exists(r'd:/URLS/'+name+'/txt/'):
os.makedirs(r'd:/URLS/'+name+'/txt/')
path=r'd:/URLS/'+name+'/'
try:
os.remove( path+'url.txt')
os.remove( path+'MalwareCallHome.txt')
except WindowsError:
pass
# if url.txt or malwarecallHome.txt exist ,del them ,then create
rows = html2table(html)
f=file(path+'url.txt','a')
ff=file(path+'MalwareCallHome.txt','a')
rows=rows[3:]
for r in rows: #
for c in r[1:2]:
if r[4]!="Malware calls home":
if r[1]=="-":
tt=r[2].replace("<wbr>","")
#print tt
if tt.find('.txt')==-1 and tt.find('.exe')==-1:
f.writelines(tt+'\n')
else:
#print 'txt&exe'#spide
print tt
try:
content = getWebPageContent(tt)
fp=file(path+tt,'a')
fp.write(content)
fp.close()
except:
pass
else:
tt=r[1].replace("<wbr>","")
#print tt
if tt.find('.txt')==-1 and tt.find('.exe')==-1:
f.writelines(tt+'\n')
else:
#print 'txt&exe'#spide
print tt
try:
content = getWebPageContent(tt)
fp=file(path+tt,'a')
fp.write(content)
fp.close()
except:
pass
else:# not walware calls home
if r[1]=="-":
tt=r[2].replace("<wbr>","")
ff.writelines(tt+'\n')
else:
tt=r[1].replace("<wbr>","")
ff.writelines(tt+'\n')
#print
#f.writelines('\n')
f.close()
ff.close()
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -