代码如下: 
复制代码 代码如下:
import sys, urllib 
import datetime,time 
def getDate(): 
strday=datetime.datetime.now().__str__() 
strday=strday.split()[0] 
return strday 
#url = \”http://www.kingnic.com/list/2009-06-16.txt\” 
def getUrl(dateStr=None): 
baseUrl =\”http://www.kingnic.com/list/\” 
if dateStr: 
return baseUrl+dateStr+\”.txt\” 
thisDate = getDate(); 
if not thisDate: 
print \”Error Date!\” 
return None; 
url = baseUrl+thisDate+\”.txt\” 
return url 
def getSource(url): 
source = urllib.urlopen(url).read() 
return source 
def save(source,filename=\”domains.txt\”): 
fp = open(filename,\”w\”) 
fp.write(source) 
fp.close() 
return True; 
def loadList(fileName=\”domains.txt\”): 
fp = open(\”domains.txt\”,\”r\”) 
source = fp.readlines() 
fp.close() 
return source; 
def getPrefix(domain): 
return domain.split(\’.\’)[0] 
def getPostfix(domain): 
return domain.split(\’.\’)[1] 
def hasMidLine(domain): 
if \’-\’ in domain: 
return True 
else: 
return False 
def parser(domains): 
max =4 
min =0 
keyword =(\’sky\’,\’see\’,\’job\’) 
result=[] 
len_num =0; 
mid_line_num =0; 
for domain in domains: 
prefix = getPrefix(domain) 
postfix = getPostfix(domain) 
domainlen = len(prefix) 
if (domainlen < min) or (domainlen > max): 
len_num +=1 
continue 
if hasMidLine(prefix): 
mid_line_num +=1 
continue 
result.append(domain) 
print \” log : \\n\” 
print \”all: \\t\”,len(domains) 
print \”len not in [%s,%s] \\t: %s\”%(max,min,len_num) 
print \”contain \’-\’ :\\t\”,mid_line_num 
print \”remain:\\t\”,len(result) 
return result; 
if __name__ == \”__main__\”: 
url = getUrl() 
source = getSource(url) 
save(source) 
domains =loadList() 
result = parser(domains) 
save(\”\”.join(result),\”result.txt\”) 
print(\”\\n\\n\\nfinished!!\”)
输出文件: 
domains.txt : kingnic.com 据当天释放的 域名; 
result.txt    : 符合过滤条件的域名; 
log输出: 
复制代码 代码如下: 
all: 55500 
len not in [4,0] : 55019 
contain \’-\’ : 32 
remain: 449 
finished!! 
对 后缀、长度和有无“-”过滤,过滤条件有点少,其它以后如有需要再加。