Browse Source

Fix relative path updating on 301/302.

master
Alec Murphy 5 years ago
parent
commit
00a26bf264
  1. 40
      uriel.py

40
uriel.py

@ -70,7 +70,9 @@ def UrielGetPage():
if url_comp.query != '':
post_scheme += '?'+url_comp.query
url = scheme + "://" + post_scheme
pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
pagereq = subprocess.Popen('wget -O - -U "' + Uriel.user_agent + '" "' + url + '"', shell=True, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
pagedata = pagereq[0]
pagehdrs = pagereq[1].split('\n')
filedata = UrielPreProcess(pagedata, url)
filesize = len(filedata)
if filesize>0:
@ -97,6 +99,42 @@ def UrielGetPage():
os.write(HGBD,Uriel.download_buffer)
logger.info("[Uriel] copy to download buffer " + url)
else:
for p_hdr in pagehdrs:
if p_hdr.lower().find('location: ') != -1:
if p_hdr.lower().find('[following]') != -1:
url_comp = urlparse.urlparse(p_hdr[p_hdr.lower().find('location: ')+10:p_hdr.lower().find('[following]')].strip())
scheme = ''
netloc = ''
path = ''
if url_comp.scheme == '':
scheme = Uriel.rel.scheme
else:
scheme = url_comp.scheme
Uriel.rel.scheme = url_comp.scheme
if url_comp.netloc == '':
netloc = Uriel.rel.netloc
else:
netloc = url_comp.netloc
Uriel.rel.netloc = url_comp.netloc
if url_comp.path != '':
if url_comp.path.find('/') != -1:
if url_comp.scheme == '' or url_comp.netloc == '':
if url_comp.path[:1] != '/':
path = Uriel.rel.path + url_comp.path
Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1]
else:
path = url_comp.path
Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1]
else:
path = url_comp.path
Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1]
else:
path = Uriel.rel.path + url_comp.path
post_scheme = netloc + "/" + urllib.quote(path)
post_scheme = post_scheme.replace('//','/')
if url_comp.query != '':
post_scheme += '?'+url_comp.query
url = scheme + "://" + post_scheme
Uriel.nav_index += 1
Uriel.history = Uriel.history[0:Uriel.nav_index]
Uriel.history.append({'url':url, 'filedata':filedata})

Loading…
Cancel
Save