You and Me ,He and She ,EveryOne Is Angel. EveryOne Has His Angel.

用Python脚本检测友情链接

September 1st, 2008 admin

相信不少站长平常比较注意检查交换过的链接,如果对方没有把自己的链接删除了,就会做出相应处理。检测友情链接的工具很多,网上一找一大把。用python实现也很简单,以后定期检测博客的友情就方便了。

代码如下,看详细注释

from sgmllib import SGMLParser
import urllib
import httplib
url=”http://www.angeltt.com”   #把这个替换成你自己的网站即可。
linkpage=”http://www.angeltt.com/index.php/links” #这个替换成你的友情链接Page页
checkpages=["","links","index.php","index.php/links","links.php","link.php"]     #检查对方的页面,可以自己增加
#定义一个UrlLister来解析页面并得到所有url

class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []

def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
#获取页面的所有链接

def fetchPage(url):
urlpart=httplib.urlsplit(url)
conn = httplib.HTTPConnection(urlpart[1])
conn.request(”HEAD”, urlpart[2])
resp = conn.getresponse()
conn.close()
if resp.status==404:
return None
page=urllib.urlopen(url)
parser=URLLister()
parser.feed(page.read())
page.close()
return parser.urls

#获取友情链接的路径
def GetFriendUrls():
urls=fetchPage(linkpage)
if urls is None:
print “Failed to Open Mypage”
else:
urls=filter(lambda s:not s.startswith(url),urls)
return urls

#检查链接

def CheckLinks():
urls=GetFriendUrls()
print urls
if urls is None:
return
for it in urls:
cnt=0
checkurls=map(lambda s:it+”/”+s,checkpages)
for i in checkurls:
try:
res=fetchPage(i)
res=filter(lambda s:s.startswith(url),res)
if res.__len__()>0:
cnt=cnt+1
except Exception:
pass
print “your link count in %s is %d”%(it,cnt)
#主入口

if __name__ ==”__main__”:
CheckLinks()