[python-chinese] a wrapper for BeautifulSoup

cry zyqmail在tom.com
星期五 九月 1 17:44:56 HKT 2006


python,您好!

作了一个小玩意,BeautifulSoup的wrapper。呵呵,学习加玩乐。如果谁知道有人已经做过了,请告诉我。谢谢。

可以让你按照层次取得想要的东西。
比如:
soup = mysoup.MySoup(doc)
# soup里只包含了<body>数据(其它的从BeautifulSoup可以轻易得到)。
# soup里有一个Body成员,一切都从这里开始。(里面所有的成员都以大写字母开头,且只包含Tag)

text1 = souppBody.Table[1].Tr[1].Td[2].content.string
text2 = Body.Table[1].Tr[1].Td[3].content.contents[0].string
#其中,content是BeautifulSoup的Tag.

mysoup.py : 这个小玩意
mysoup_test.py : 一个应用的例子,取在线字典的数据,使用:python mysoup_test.py 要查的词

	root$python mysoup_test.py python
	python:
	n. 丹舌,大蟒,巨蟒

mysoup.py ---------------------------------------------------------------------
# -*- coding: utf-8 -*-

"""
MySoup : a wrapper of BeautifulSoup

Author : Robin Zhang
Date : 2006/9/1
Example:
    soup = MySoup(''.join(s))
    text1 = soup.Body.Table[1].Tr[1].Td[0].Table[0].Tr[1].Td[0].Table[0].Tr[0].Td[0].Big[0].B[0].Font[0].content.string
    text2 = soup.Body.Table[1].Tr[1].Td[0].Table[0].Tr[1].Td[0].Table[0].Tr[0].Td[0].Big[1].Font[0].content.contents[0].string
    text3 = soup.Body.Table[1].Tr[1].Td[0].Table[0].Tr[1].Td[0].Table[0].Tr[0].Td[0].Big[1].Font[0].content.contents[2].string

"""

from BeautifulSoup import BeautifulSoup

class LayerTag:
    """
    """
    def __init__(self, Tag):
        """
        """
        self.content = Tag
        
    def AddSubTag(self, SubTagName, SubLayerTag):
        """
        """
        if getattr(self, SubTagName.capitalize(), None) == None:
            setattr(self, SubTagName.capitalize(), [])
        getattr(self, SubTagName.capitalize()).append(SubLayerTag)
                
class MySoup(BeautifulSoup):
    """
    """
    def __init__(self, *args, **kwargs):
        BeautifulSoup.__init__(self, *args, **kwargs)
        self.Body = self._create_tag_layer(self.html.body)

    def _create_tag_layer(self, Tag):
        """
        """
        layertag = LayerTag(Tag)
        for content in Tag.contents:
            if content.__class__.__name__ == "Tag":
                layertag.AddSubTag(content.name, self._create_tag_layer(content))
        return layertag

mysoup_test.py ---------------------------------------------------------------------
# -*- coding: utf-8 -*-

"""
MySoup_test : test MySoup

Author : Robin Zhang
Date : 2006/9/1
"""

import mysoup
import urllib2,sys

if __name__ == '__main__':
    """
    This example visits an on line dictionary site to consult a word
    """
    url = "http://sh.dict.cn/search/?q="

    if len(sys.argv) < 2:
        print "Usage : mysoup new_word"
        sys.exit()

    try:
        page = urllib2.urlopen("%s%s" % (url, sys.argv[1]))
        soup = mysoup.MySoup(page).Body.Table[1].Tr[1].Td[0].Table[0].Tr[1].Td[0].Table[0].Tr[0].Td[0]
        print soup.Big[0].B[0].Font[0].content.string.strip("\r\n")
        for item in soup.Big[1].Font[0].content.contents:
            if item.__class__.__name__!="Tag":
                print item.string.strip("\r\n")
    except Exception, msg:
        print "Error: %s" % msg
---------------------------------------------------------------------------------

                    致
礼!

            cry
            zyqmail在tom.com




关于邮件列表 python-chinese 的更多信息