import bs4 import re URIEL_GETPAGE = 0x10 URIEL_NAVBACK = 0x11 URIEL_NAVFWD = 0x12 URIEL_STR_SIZE = 144 URIEL_THUMB = 0x13 URIEL_DOWNLOAD = 0x14 class Uriel: download_buffer = '' user_agent = '' history = [] nav_index = -1 class rel: scheme = '' netloc = '' path = '' def uriel(data): if data == URIEL_GETPAGE: UrielGetPage() if data == URIEL_NAVBACK: UrielNavBack() if data == URIEL_NAVFWD: UrielNavFwd() if data == URIEL_THUMB: UrielThumb() if data == URIEL_DOWNLOAD: UrielDownload() def UrielGetPage(): global Uriel os.lseek(HGBD,0,os.SEEK_SET) HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4) if Uriel.user_agent == '': Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')] url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')]) scheme = '' netloc = '' path = '' if url_comp.scheme == '': scheme = Uriel.rel.scheme else: scheme = url_comp.scheme Uriel.rel.scheme = url_comp.scheme if url_comp.netloc == '': netloc = Uriel.rel.netloc else: netloc = url_comp.netloc Uriel.rel.netloc = url_comp.netloc if url_comp.path != '': if url_comp.path.find('/') != -1: if url_comp.scheme == '' or url_comp.netloc == '': if url_comp.path[:1] != '/': path = Uriel.rel.path + url_comp.path Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = Uriel.rel.path + url_comp.path post_scheme = netloc + "/" + urllib.quote(path) post_scheme = post_scheme.replace('//','/') url = scheme + "://" + post_scheme pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] filedata = UrielPreProcess(pagedata, url) filesize = len(filedata) if filesize>0: if filedata.find('$AN,"",A="BINARY"$') != -1: Uriel.download_buffer = pagedata if url[-2:].upper()==".Z": tmp_z_file = "/tmp/" + str(uuid.uuid4()).split('-')[0].upper() + ".Z" while os.path.exists(tmp_z_file): tmp_z_file = "/tmp/" + str(uuid.uuid4()).split('-')[0].upper() + ".Z" open(tmp_z_file,"wb").write(pagedata) try: os.system('tosz "' + tmp_z_file + '"') pagedata = open(tmp_z_file.split('.Z')[0],"rb").read() os.remove(tmp_z_file.split('.Z')[0]) Uriel.download_buffer = pagedata except: Uriel.download_buffer = '' Uriel.nav_index += 1 Uriel.history = Uriel.history[0:Uriel.nav_index] Uriel.history.append({'url':url, 'filedata':filedata}) ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) os.lseek(HGBD,128,os.SEEK_SET) os.write(HGBD,str(url)[:URIEL_STR_SIZE]) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) os.write(HGBD,filedata) logger.info("[Uriel] navigate to " + url) else: filesize = -1 ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) logger.error("[Uriel] error reading url " + url) conn.send(chr(URIEL_GETPAGE)) def UrielNavBack(): global Uriel if Uriel.nav_index > 0: Uriel.nav_index -= 1 url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url']) filedata = Uriel.history[Uriel.nav_index]['filedata'] scheme = '' netloc = '' path = '' if url_comp.scheme == '': scheme = Uriel.rel.scheme else: scheme = url_comp.scheme if url_comp.netloc == '': netloc = Uriel.rel.netloc else: netloc = url_comp.netloc if url_comp.path != '': if url_comp.path.find('/') != -1: if url_comp.scheme == '' or url_comp.netloc == '': if url_comp.path[:1] != '/': path = Uriel.rel.path + url_comp.path Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = Uriel.rel.path + url_comp.path post_scheme = netloc + "/" + urllib.quote(path) post_scheme = post_scheme.replace('//','/') url = scheme + "://" + post_scheme filesize = len(filedata) if filesize>0: ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) os.lseek(HGBD,128,os.SEEK_SET) os.write(HGBD,str(url)[:URIEL_STR_SIZE]) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) os.write(HGBD,filedata) logger.info("[Uriel] history navigate back to " + url) else: filesize = -1 ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) logger.error("[Uriel] error reading history for url " + url) conn.send(chr(URIEL_NAVBACK)) def UrielNavFwd(): global Uriel if Uriel.nav_index < len(Uriel.history)-1: Uriel.nav_index += 1 url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url']) filedata = Uriel.history[Uriel.nav_index]['filedata'] scheme = '' netloc = '' path = '' if url_comp.scheme == '': scheme = Uriel.rel.scheme else: scheme = url_comp.scheme if url_comp.netloc == '': netloc = Uriel.rel.netloc else: netloc = url_comp.netloc if url_comp.path != '': if url_comp.path.find('/') != -1: if url_comp.scheme == '' or url_comp.netloc == '': if url_comp.path[:1] != '/': path = Uriel.rel.path + url_comp.path Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = Uriel.rel.path + url_comp.path post_scheme = netloc + "/" + urllib.quote(path) post_scheme = post_scheme.replace('//','/') url = scheme + "://" + post_scheme filesize = len(filedata) if filesize>0: ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) os.lseek(HGBD,128,os.SEEK_SET) os.write(HGBD,str(url)[:URIEL_STR_SIZE]) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) os.write(HGBD,filedata) logger.info("[Uriel] history navigate fwd to " + url) else: filesize = -1 ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) logger.error("[Uriel] error reading history for url " + url) conn.send(chr(URIEL_NAVFWD)) def UrielThumb(): global Uriel os.lseek(HGBD,0,os.SEEK_SET) HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4) if Uriel.user_agent == '': Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')] url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')]) scheme = '' netloc = '' path = '' if url_comp.scheme == '': scheme = Uriel.rel.scheme else: scheme = url_comp.scheme if url_comp.netloc == '': netloc = Uriel.rel.netloc else: netloc = url_comp.netloc if url_comp.path != '': if url_comp.path.find('/') != -1: if url_comp.scheme == '' or url_comp.netloc == '': if url_comp.path[:1] != '/': path = Uriel.rel.path + url_comp.path Uriel.rel.path += url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = url_comp.path Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] else: path = Uriel.rel.path + url_comp.path post_scheme = netloc + "/" + urllib.quote(path) post_scheme = post_scheme.replace('//','/') url = scheme + "://" + post_scheme tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp' while os.path.exists(tmp_thumb): tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp' pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null | gm convert -resize 100x100 - -colors 16 "' + tmp_thumb + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] filedata = open(tmp_thumb,"rb").read() try: os.remove(tmp_thumb) except: pass filesize = len(filedata) if filesize>0: ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) os.write(HGBD,filedata) logger.info("[Uriel] get image thumbnail " + url) else: filesize = -1 ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) logger.error("[Uriel] error reading url " + url) conn.send(chr(URIEL_THUMB)) def UrielDownload(): global Uriel filedata = Uriel.download_buffer filesize = len(filedata) if filesize>0: ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) os.write(HGBD,filedata) logger.info("[Uriel] binary, download file") else: filesize = -1 ZeroParamBuf() os.lseek(HGBD,0,os.SEEK_SET) os.write(HGBD,str(filesize)) logger.error("[Uriel] error downloading file") conn.send(chr(URIEL_DOWNLOAD)) def UrielPreProcess(htm1, l_url): title_text = '' hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="U_CloseBrowser;"$ $MA+LIS,"[Back]",LM="U_HistNav(0);"$ $MA+LIS,"[Fwd]",LM="U_HistNav(1);"$ $MA+LIS,"[Go]",LM="U_Browser(GetStr(\\"\nURL> \\"));"$ ' + title_text + '\n\n' if htm1.upper().find('',' ') htm1 = htm1.replace('
',' ') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('
','\n') htm1 = htm1.replace('',' * ') htm1 = htm1.replace(' ',' * ') htm1 = htm1.replace('','') htm1 = htm1.replace('','') title_text = '' a_pos = htm1.upper().find(' ') if a_pos != -1: title_text = htm1[a_pos:htm1.find('', a_pos)].split('>')[1] soup1 = bs4.BeautifulSoup(htm1, 'lxml') unwrap_tags = [ 'html', 'body', 'p', 'b', 'pre', 'span', 'table', 'header' ] for tag in unwrap_tags: for match in soup1.findAll(tag): match.unwrap() for f in soup1.findAll('a'): for tag in f.findAll(True): if str(tag).find(' ') == -1: if tag.name.upper() != 'IMG': tag.decompose() remove_tags = [ 'svg', 'embed', 'head', 'noscript', 'object', 'param', 'script', 'option' ] for tag in remove_tags: [s.extract() for s in soup1(tag)] html = str(soup1) html = html.replace(' ','$PURPLE$') html = html.replace('
','$BLACK$') html = html.replace('','$UL,1$') html = html.replace('','$UL,1$') html = html.replace('','$UL,0$') html = html.replace('','$UL,0$') html = html.replace('','$IV,1$') html = html.replace('','$IV,1$') html = html.replace('','$IV,0$') html = html.replace('','$IV,0$') a_pos = html.upper().find('','$PURPLE$') html = html.replace('
','$BLACK$') html = html.replace('')[0] img_text.replace("'",'"') img_src = '' img_pos = img_text.upper().find('SRC') if img_pos > 0: img_src = img_text[img_text.upper().find('SRC'):].split('"')[1] img_el = '[URIEL_IMG]' + img_src + '[/URIEL_IMG]' html = html[:a_pos] + img_el + html[1+html.upper().find('>', a_pos):] a_pos = html.upper().find('
')[1] button_text = button_text[:button_text.upper().find('', a_pos):] a_pos = html.upper().find('