ancor_extracor.py
import urllib
from sgmllib import SGMLParser
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
if __name__ == "__main__":
page = urllib.urlopen("/lehre/ss08/web-mining/uebungen.html")
extractor = URLLister()
extractor.feed(page.read())
extractor.close()
for u in extractor.urls:
pass
print u
+49 6151 16-21811
+49 6151 16-

Drucken
Impressum
Sitemap
Suche
