With requests or urllib how can I get a fully loaded page?

1

I'm starting in this web scraping and I'm trying to download all the images of an imgur post, there are more than 1500 but I only download the first 15, that's because the page is loading 15 in 15 images, how can i make the request.get (url) .content me of the fully loaded page? Or with some other library.

The code of my scraper is as follows:

from bs4 import BeautifulSoup as bs
import requests as r
from os import listdir as ls
from random import randint as rand

#funciones que ayudaran un poco.

abc="abcdefghijklmjopqrstuvwxyz"
abc=abc+abc.upper()+"1234567890_"

def get_ext(url):
    return url[-4:]

def fix_rel(url,prot="http"):
    if url[:len(prot)]==prot:
        return url
    else:
        if url[:2]=="//":
            return prot+":"+url
        else:
            return prot+":/"+url

def random_string(leng=5):
    gen=str()
    for el in range(0,leng):
        gen=gen+abc[rand(0,len(abc)-1)]
    return gen

def add(q):
    try:
        return str(int(q)+1)
    except:
        return str(q)+random_string(8)

def setfilename(dire,name="1",ext=".txt"):
    if name+ext in ls(dire):
        return setfilename(dire,add(name),ext)
    else:
        return name+ext

def down(url,dest,filename="download",ext=".txt"):
    f=open(dest+setfilename(dest,filename,ext),"w")
    f.write(r.get(url).content)
    f.close()

#aqui empieza el web scraping

url="http://m.imgur.com/a/46UVO"
#url="http://m.imgur.com/gallery/hW9it"
dest="/sdcard/images/mlpedits/"
print "Descargando html..."
sopa=bs(ul.urlopen(url).read(),"html.parser")
i=0
ok=0
print "Descargando imagenes..."
imagenes=sopa.findAll("img")
c=len(imagenes)
for el in imagenes:
    i+=1
    try:
        down(basic.fix_rel(el["src"]),dest,"a",basic.get_ext(el["src"]))
        print str(i)+"/"+str(c)+" -> "+el["src"]
        ok+=1
    except:
        print str(i)+"/"+str(c)+" x "+el["src"]

print "\n\n",str(ok)+"/"+str(i)+" descargados"
    
asked by Samel Fagel 10.10.2016 в 00:01
source

0 answers