|
|
|
@ -70,7 +70,9 @@ def UrielGetPage():
|
|
|
|
|
if url_comp.query != '': |
|
|
|
|
post_scheme += '?'+url_comp.query |
|
|
|
|
url = scheme + "://" + post_scheme |
|
|
|
|
pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] |
|
|
|
|
pagereq = subprocess.Popen('wget -O - -U "' + Uriel.user_agent + '" "' + url + '"', shell=True, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate() |
|
|
|
|
pagedata = pagereq[0] |
|
|
|
|
pagehdrs = pagereq[1].split('\n') |
|
|
|
|
filedata = UrielPreProcess(pagedata, url) |
|
|
|
|
filesize = len(filedata) |
|
|
|
|
if filesize>0: |
|
|
|
@ -97,6 +99,42 @@ def UrielGetPage():
|
|
|
|
|
os.write(HGBD,Uriel.download_buffer) |
|
|
|
|
logger.info("[Uriel] copy to download buffer " + url) |
|
|
|
|
else: |
|
|
|
|
for p_hdr in pagehdrs: |
|
|
|
|
if p_hdr.lower().find('location: ') != -1: |
|
|
|
|
if p_hdr.lower().find('[following]') != -1: |
|
|
|
|
url_comp = urlparse.urlparse(p_hdr[p_hdr.lower().find('location: ')+10:p_hdr.lower().find('[following]')].strip()) |
|
|
|
|
scheme = '' |
|
|
|
|
netloc = '' |
|
|
|
|
path = '' |
|
|
|
|
if url_comp.scheme == '': |
|
|
|
|
scheme = Uriel.rel.scheme |
|
|
|
|
else: |
|
|
|
|
scheme = url_comp.scheme |
|
|
|
|
Uriel.rel.scheme = url_comp.scheme |
|
|
|
|
if url_comp.netloc == '': |
|
|
|
|
netloc = Uriel.rel.netloc |
|
|
|
|
else: |
|
|
|
|
netloc = url_comp.netloc |
|
|
|
|
Uriel.rel.netloc = url_comp.netloc |
|
|
|
|
if url_comp.path != '': |
|
|
|
|
if url_comp.path.find('/') != -1: |
|
|
|
|
if url_comp.scheme == '' or url_comp.netloc == '': |
|
|
|
|
if url_comp.path[:1] != '/': |
|
|
|
|
path = Uriel.rel.path + url_comp.path |
|
|
|
|
Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1] |
|
|
|
|
else: |
|
|
|
|
path = url_comp.path |
|
|
|
|
Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] |
|
|
|
|
else: |
|
|
|
|
path = url_comp.path |
|
|
|
|
Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] |
|
|
|
|
else: |
|
|
|
|
path = Uriel.rel.path + url_comp.path |
|
|
|
|
post_scheme = netloc + "/" + urllib.quote(path) |
|
|
|
|
post_scheme = post_scheme.replace('//','/') |
|
|
|
|
if url_comp.query != '': |
|
|
|
|
post_scheme += '?'+url_comp.query |
|
|
|
|
url = scheme + "://" + post_scheme |
|
|
|
|
Uriel.nav_index += 1 |
|
|
|
|
Uriel.history = Uriel.history[0:Uriel.nav_index] |
|
|
|
|
Uriel.history.append({'url':url, 'filedata':filedata}) |
|
|
|
|