uses libxml2dom
http://www.boddie.org.uk/python/libxml2dom.htmlbasically you supply it the url of the gallery
the complete url of all the thumbs will be returned as a list.
def get_thumbs(url):
tree = libxml2dom.parseURI(url,1)
anchors = tree.getElementsByTagName("a")
#(g_proto,g_netloc,g_path,g_params,g_query) = urlparse.urlsplit(url)
result = []
thumb_exts = [".jpg",".gif",".avi",".mpg",".wmv"]
for anchor in anchors :
href = anchor.getAttribute("href")
#print urlparse.urlsplit(href)
(proto,netloc,path,params,query) = urlparse.urlsplit(href)
(root,ext) = os.path.splitext(path.lower())
if thumb_exts.count(ext)>0 :
imgs = anchor.getElementsByTagName("img")
if len(imgs)>0 :
img_src = imgs[0].getAttribute("src")
result.append(urlparse.urljoin(url,img_src))
if len(result)<=0 :
print url
raise "Error"
return result