From 06a0e51ca086d3ee76c7506d4e804e646e816f69 Mon Sep 17 00:00:00 2001 From: Alec Murphy Date: Sat, 18 Feb 2017 22:18:59 -0500 Subject: [PATCH] Convert to HGBD module, updated README. --- README.md | 33 ++-- Uriel.HC | 308 +++++++++++++----------------- uriel.py | 432 ++++++++++++++++++++++++++++++++++++++++++ uriel_preprocessor.py | 206 -------------------- uriel_proxy | 176 ----------------- 5 files changed, 579 insertions(+), 576 deletions(-) create mode 100644 uriel.py delete mode 100755 uriel_preprocessor.py delete mode 100755 uriel_proxy diff --git a/README.md b/README.md index 025a766..13d436d 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,28 @@ # uriel -Uriel Web Browser & IRC Client for TempleOS +Uriel Web Browser & (not-yet) IRC Client for TempleOS -This is a proof-of-concept web browser & (soon) IRC client for TempleOS. +This is a proof-of-concept web browser & (soon) IRC client for TempleOS. It is still very early stages. -Start the proxy on the Host: - +Add the following to your `/etc/hgbdd.conf`: ``` - ./uriel_proxy + modules: { + "uriel":"/path/to/uriel.py" + } ``` -and bind a chardev in QEMU (or serial2 in VMware/VirtualBox) to TCP 127.0.0.1:7202 - +In your `HomeKeyPlugIns.HC` or other startup script: ``` - #include "Uriel" + #include "HGBD"; + #include "Uriel"; ``` You can launch a Browser in the current Task with: ``` - Browser(url); -``` - -Get files with: -``` - Get(url or host_path, local_path); -``` - -or Send files with: -``` - Send(local_path, host_path); + U_Browser(url); ``` # Prerequisites -- wget +- [HGBD](https://github.com/tramplersheikhs/hgbd) - Beautiful Soup 4 (for DolDoc preprocessing) -- GraphicsMagick (for inline images) -- TOSZ (to download .Z files) diff --git a/Uriel.HC b/Uriel.HC index 873678d..f73779f 100755 --- a/Uriel.HC +++ b/Uriel.HC @@ -1,186 +1,150 @@ -#define URIEL_BINARY_EOF "*[U_EOF]" -#define URIEL_BINARY_SOF "*[U_SOF]" -#define URIEL_FCMD_GET "*[U_GET]" -#define URIEL_FCMD_PUT "*[U_PUT]" -#define URIEL_TMP_IMG "/Tmp/uriel_img.bmp" -#define URIEL_PORT 2 -#define URIEL_RAW_PORT 0x02F8 -#define URIEL_TIMEOUT 1 -#include "::/Doc/Comm" +#define URIEL_GETPAGE 0x10 +#define URIEL_NAVBACK 0x11 +#define URIEL_NAVFWD 0x12 +#define URIEL_THUMB 0x13 +#define URIEL_DOWNLOAD 0x14 -U8 UrielPageBuf[2097152]; -U8 UrielFileBuf[10485760]; -I64 UrielFileBufSize=0; -I64 UrielPageBufSize=0; -I64 UrielGetFileAborted=0; -I64 UrielGetFileSuccess=0; -CComm *c=CommInit8n1(URIEL_PORT,115200); +#define URIEL_DL_PATH "::/Home/Downloads/" +#define URIEL_THUMB_BMP "/Tmp/UrielThumb.bmp" +#define URIEL_VERSION "Uriel/0.2" -U0 Get(U8 *remote_file, U8 *local_file="file") { - UrielGetFileAborted=0; - UrielGetFileSuccess=0; - Bool load=FALSE; - I64 b=0; - I64 PrevFileBufSize=0; - I64 StartTimer=0; - I64 TickTimer=0; - UrielFileBufSize=0; - CommPutS(URIEL_PORT, URIEL_FCMD_GET); - CommPutS(URIEL_PORT, remote_file); - CommPutS(URIEL_PORT, "|"); - CommPutS(URIEL_PORT, Define("DD_OS_NAME_VERSION")); - CommPutS(URIEL_PORT, "^"); - progress1_max=100; - progress1=0; - StrCpy(progress1_desc, "Receiving file"); - while (!load) { - if ((TickTimer-StartTimer)>URIEL_TIMEOUT) { UrielFileBufSize=0; break; }; - if (InU8(0x60)==0x01) { UrielGetFileAborted=1; UrielFileBufSize=0; break; }; - PrevFileBufSize=UrielFileBufSize; - Sleep(0); - while (FifoU8Rem(c->RX_fifo,&b)) { - if (b != 0x00) { - UrielFileBuf[UrielFileBufSize] = b; - UrielFileBufSize++; - progress1++; - if (progress1>progress1_max) { progress1=0; }; - } - else { - if (StrCmp(UrielFileBuf+(UrielFileBufSize-8), URIEL_BINARY_EOF)==0) { - UrielFileBufSize -= 8; - load=TRUE; - UrielGetFileSuccess=1; - break; - } - else { - UrielFileBuf[UrielFileBufSize] = b; - UrielFileBufSize++; - progress1++; - if (progress1>progress1_max) { progress1=0; }; - } - } - } - if (PrevFileBufSize==UrielFileBufSize) { - if (StartTimer==0) { StartTimer=SysTimerRead/1000000; }; - TickTimer=SysTimerRead/1000000; - } - } - FileWrite(local_file, UrielFileBuf, UrielFileBufSize); - ProgressBarsRst; -} +U8 URIEL_USER_AGENT[64]; +StrCpy(URIEL_USER_AGENT, URIEL_VERSION); +StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), " ("); +StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), Define("DD_OS_NAME_VERSION")); +StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), ")"); -U0 Send(U8 *local_file, U8 *remote_file="") { - CDirEntry *d_file = FilesFind(local_file); - U8 *s_file = FileRead(d_file->full_name, d_file->size); - I64 f_pos=0; - CommPutS(URIEL_PORT, URIEL_FCMD_PUT); - CommPutS(URIEL_PORT, local_file); - CommPutS(URIEL_PORT, "|"); - CommPutS(URIEL_PORT, remote_file); - CommPutS(URIEL_PORT, URIEL_BINARY_SOF); - progress1_max=d_file->size; - progress1=0; - StrCpy(progress1_desc, "Sending file"); - for (f_pos=0;f_possize;f_pos++) { - if (InU8(0x60)==0x01) { UrielFileBufSize=0; break; }; - Sleep(0); - progress1=f_pos; - OutU8(URIEL_RAW_PORT, s_file[f_pos]); - } - CommPutS(URIEL_PORT, URIEL_BINARY_EOF); - ProgressBarsRst; - Free(s_file); - Free(d_file); +U0 U_CloseBrowser() +{ + Bool close=FALSE; + close = PopUpCancelOk("Close Uriel Browser?"); + if (close) { In("x\n"); }; } -U0 InsertImg(U8 *img_anchor, U8 *img_sid, U8 *img_eid, U8 *img_url) { - UrielGetFileAborted=0; - UrielGetFileSuccess=0; - DocAnchorFind(DocPut, img_anchor); - Get(img_url, URIEL_TMP_IMG); - while (UrielGetFileAborted==0 && UrielGetFileSuccess==0) { - DocAnchorFind(DocPut, img_anchor); - Get("retry:send", URIEL_TMP_IMG); - } - if (UrielGetFileAborted==0) { - DocAnchorFind(DocPut, img_anchor); - DocBMP(,URIEL_TMP_IMG); - if (StrCmp(img_sid, "")!=0) { - DocAnchorFind(DocPut, img_sid); - DocPrintPartial(DocPut, "$ID,14$"); - } - if (StrCmp(img_eid, "")!=0) { - DocAnchorFind(DocPut, img_eid); - DocPrintPartial(DocPut, "$ID,-14$"); - } - DocAnchorFind(DocPut, img_anchor); - } - UrielGetFileAborted=0; - UrielGetFileSuccess=0; +U0 U_InsertThumb(U8 *anchor, U8 *indent, U8 *outdent, U8 *url) +{ + DocAnchorFind(DocPut,anchor); + I64 size; + ZeroParamBuf; + StrCpy(HGBD_PARAM_BUF,URIEL_USER_AGENT); + WriteParamBuf; + BlkWrite(HGBD,url,1,(StrLen(url)/BLK_SIZE)+1); + HGExec(URIEL_THUMB); + ReadParamBuf; + size = Str2I64(HGBD_PARAM_BUF); + if (size==-1) { + PopUpOk("Error loading image."); + return; + }; + BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1); + FileWrite(URIEL_THUMB_BMP, HGFS_BUF, size); + DocAnchorFind(DocPut,anchor); + DocBMP(,URIEL_THUMB_BMP); + if (StrCmp(indent,"")!=0) { + DocAnchorFind(DocPut,indent); + DocPrintPartial(DocPut,"$ID,14$"); + }; + if (StrCmp(indent,"")!=0) { + DocAnchorFind(DocPut,outdent); + DocPrintPartial(DocPut,"$ID,-14$"); + }; + DocAnchorFind(DocPut,anchor); } -U0 CloseBrowser() { - Bool close=FALSE; - close = PopUpCancelOk("Close Browser?"); - if (close) { In("x\n"); }; +U0 U_Browser(U8 *url) +{ + I64 size; + U8 *cmd; + ZeroParamBuf; + StrCpy(HGBD_PARAM_BUF,URIEL_USER_AGENT); + WriteParamBuf; + BlkWrite(HGBD,url,1,(StrLen(url)/BLK_SIZE)+1); + HGExec(URIEL_GETPAGE); + ReadParamBuf; + size = Str2I64(HGBD_PARAM_BUF); + if (size==-1) { + PopUpOk("Bad URL."); + return; + }; + BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1); + MemSetU8(ToI64(HGFS_BUF)+size, 0x0, 1); + WinMax; + CDoc *page = DocNew(); + DocPrintPartial(page,HGFS_BUF); + DocClear; + DocInsDoc(DocPut,page); + DocTop; + DocDel(page); + while (TRUE) { + cmd = GetStr; + if (StrCmp(cmd,"x")==0) { break; }; + } + Free(cmd); + DocBottom; } -U0 Browser(U8 *url) +U0 U_HistNav(I64 index) { - WinMax; - U8 *Cmd; - UrielPageBufSize=0; - I64 UrielPageBufPos=0; - - CommPutS(URIEL_PORT, url); - CommPutS(URIEL_PORT, "|"); - CommPutS(URIEL_PORT, Define("DD_OS_NAME_VERSION")); - CommPutS(URIEL_PORT, "^"); - - CDoc *UrielPage = DocNew(); - I64 b=0; - Bool load=FALSE; - Bool proc=FALSE; - Bool pad=TRUE; - - progress1_max=100; - progress1=0; - StrCpy(progress1_desc, "Request sent, waiting for response"); - while (!load) { - if (InU8(0x60)==0x01) { UrielPageBufSize=0; break; }; - Sleep(0); - while (FifoU8Rem(c->RX_fifo,&b)) { - if (b == 0xFF && !pad) { load=TRUE; break; }; - if (b != 0xFF && pad) { pad=FALSE; }; - if (b != 0xFF && !pad) { - if (!proc) { proc=TRUE; StrCpy(progress1_desc, "Processing DolDoc"); }; - UrielPageBuf[UrielPageBufSize] = b; - UrielPageBufSize++; - progress1++; - if (progress1>progress1_max) { progress1=0; }; - } - } - } - - while (UrielPageBufPos0) { + StrCpy(localfile+StrLen(localfile), StrLastOcc(url,"/")+1); + } else { + StrCpy(localfile+StrLen(localfile), "Download.OUT"); + }; + ZeroParamBuf; + HGExec(URIEL_DOWNLOAD); + ReadParamBuf; + size = Str2I64(HGBD_PARAM_BUF); + if (size==-1) { + PopUpOk("Bad URL."); + return; + }; + BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1); + if (StrCmp(localfile+StrLen(localfile)-2,".Z")==0) { + U8 uzlocalfile[28]; + MemCpy(uzlocalfile,localfile,StrLen(localfile)-2); + uzlocalfile[StrLen(localfile)-1] = 0x0; + FileWrite(uzlocalfile, HGFS_BUF, size); + Move(uzlocalfile, localfile); + } else { + FileWrite(localfile, HGFS_BUF, size); + }; + PopUpOk("Downloaded file: %s", localfile); } -U0 Navigate(U8 *anchor, U8 *url) { - DocAnchorFind(DocPut, anchor); - Browser(url); +U0 U_Navigate(U8 *anchor, U8 *url) { + DocAnchorFind(DocPut,anchor); + U_Browser(url); } diff --git a/uriel.py b/uriel.py new file mode 100644 index 0000000..2092e9e --- /dev/null +++ b/uriel.py @@ -0,0 +1,432 @@ +import bs4 +import re + +URIEL_GETPAGE = 0x10 +URIEL_NAVBACK = 0x11 +URIEL_NAVFWD = 0x12 +URIEL_THUMB = 0x13 +URIEL_DOWNLOAD = 0x14 + +class Uriel: + download_buffer = '' + user_agent = '' + history = [] + nav_index = -1 + class rel: + scheme = '' + netloc = '' + path = '' + +def uriel(data): + if data == URIEL_GETPAGE: + UrielGetPage() + if data == URIEL_NAVBACK: + UrielNavBack() + if data == URIEL_NAVFWD: + UrielNavFwd() + if data == URIEL_THUMB: + UrielThumb() + if data == URIEL_DOWNLOAD: + UrielDownload() + +def UrielGetPage(): + global Uriel + os.lseek(HGBD,0,os.SEEK_SET) + HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4) + if Uriel.user_agent == '': + Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')] + url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')]) + scheme = '' + netloc = '' + path = '' + if url_comp.scheme == '': + scheme = Uriel.rel.scheme + else: + scheme = url_comp.scheme + Uriel.rel.scheme = url_comp.scheme + if url_comp.netloc == '': + netloc = Uriel.rel.netloc + else: + netloc = url_comp.netloc + Uriel.rel.netloc = url_comp.netloc + if url_comp.path != '': + if url_comp.path.find('/') != -1: + path = url_comp.path + Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1] + else: + path = Uriel.rel.path + url_comp.path + url = scheme + "://" + netloc + urllib.quote(path) + pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] + filedata = UrielPreProcess(pagedata, url) + filesize = len(filedata) + if filesize>0: + if filedata.find('$AN,"",A="BINARY"$') != -1: + Uriel.download_buffer = pagedata + Uriel.nav_index += 1 + Uriel.history = Uriel.history[0:Uriel.nav_index] + Uriel.history.append({'url':url, 'filedata':filedata}) + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + os.write(HGBD,filedata) + logger.info("[Uriel] navigate to " + url) + else: + filesize = -1 + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + logger.error("[Uriel] error reading url " + url) + conn.send(chr(URIEL_GETPAGE)) + +def UrielNavBack(): + global Uriel + if Uriel.nav_index > 0: + Uriel.nav_index -= 1 + url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url']) + filedata = Uriel.history[Uriel.nav_index]['filedata'] + scheme = '' + netloc = '' + path = '' + if url_comp.scheme == '': + scheme = Uriel.rel.scheme + else: + scheme = url_comp.scheme + if url_comp.netloc == '': + netloc = Uriel.rel.netloc + else: + netloc = url_comp.netloc + if url_comp.path != '': + if url_comp.path.find('/') != -1: + path = url_comp.path + else: + path = Uriel.rel.path + url_comp.path + url = scheme + "://" + netloc + urllib.quote(path) + filesize = len(filedata) + if filesize>0: + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + os.write(HGBD,filedata) + logger.info("[Uriel] history navigate back to " + url) + else: + filesize = -1 + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + logger.error("[Uriel] error reading history for url " + url) + conn.send(chr(URIEL_NAVBACK)) + +def UrielNavFwd(): + global Uriel + if Uriel.nav_index < len(Uriel.history)-1: + Uriel.nav_index += 1 + url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url']) + filedata = Uriel.history[Uriel.nav_index]['filedata'] + scheme = '' + netloc = '' + path = '' + if url_comp.scheme == '': + scheme = Uriel.rel.scheme + else: + scheme = url_comp.scheme + if url_comp.netloc == '': + netloc = Uriel.rel.netloc + else: + netloc = url_comp.netloc + if url_comp.path != '': + if url_comp.path.find('/') != -1: + path = url_comp.path + else: + path = Uriel.rel.path + url_comp.path + url = scheme + "://" + netloc + urllib.quote(path) + filesize = len(filedata) + if filesize>0: + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + os.write(HGBD,filedata) + logger.info("[Uriel] history navigate fwd to " + url) + else: + filesize = -1 + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + logger.error("[Uriel] error reading history for url " + url) + conn.send(chr(URIEL_NAVFWD)) + +def UrielThumb(): + global Uriel + os.lseek(HGBD,0,os.SEEK_SET) + HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4) + if Uriel.user_agent == '': + Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')] + url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')]) + scheme = '' + netloc = '' + path = '' + if url_comp.scheme == '': + scheme = Uriel.rel.scheme + else: + scheme = url_comp.scheme + if url_comp.netloc == '': + netloc = Uriel.rel.netloc + else: + netloc = url_comp.netloc + if url_comp.path != '': + if url_comp.path.find('/') != -1: + path = url_comp.path + else: + path = Uriel.rel.path + url_comp.path + url = scheme + "://" + netloc + urllib.quote(path) + tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp' + while os.path.exists(tmp_thumb): + tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp' + pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null | gm convert -resize 100x100 - -colors 16 "' + tmp_thumb + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] + filedata = open(tmp_thumb,"rb").read() + try: + os.remove(tmp_thumb) + except: + pass + filesize = len(filedata) + if filesize>0: + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + os.write(HGBD,filedata) + logger.info("[Uriel] get image thumbnail " + url) + else: + filesize = -1 + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + logger.error("[Uriel] error reading url " + url) + conn.send(chr(URIEL_THUMB)) + +def UrielDownload(): + global Uriel + filedata = Uriel.download_buffer + filesize = len(filedata) + if filesize>0: + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + os.lseek(HGBD,BLK_SIZE,os.SEEK_SET) + os.write(HGBD,filedata) + logger.info("[Uriel] binary, download file") + else: + filesize = -1 + ZeroParamBuf() + os.lseek(HGBD,0,os.SEEK_SET) + os.write(HGBD,str(filesize)) + logger.error("[Uriel] error downloading file") + conn.send(chr(URIEL_DOWNLOAD)) + +def UrielPreProcess(htm1, l_url): + title_text = '' + hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="U_CloseBrowser;"$ $MA+LIS,"[Back]",LM="U_HistNav(0);"$ $MA+LIS,"[Fwd]",LM="U_HistNav(1);"$ $MA+LIS,"[Go]",LM="U_Browser(GetStr(\\"\nURL> \\"));"$ ' + title_text + '\n\n' + + if htm1.upper().find('',' ') + htm1 = htm1.replace('
',' ') + + htm1 = htm1.replace('
','\n') + htm1 = htm1.replace('
','\n') + htm1 = htm1.replace('
','\n') + htm1 = htm1.replace('
','\n') + htm1 = htm1.replace('
','\n') + htm1 = htm1.replace('
','\n') + + htm1 = htm1.replace('
  • ',' * ') + htm1 = htm1.replace('
  • ',' * ') + + htm1 = htm1.replace('','') + htm1 = htm1.replace('','') + + title_text = '' + a_pos = htm1.upper().find('') + if a_pos != -1: + title_text = htm1[a_pos:htm1.find('</', a_pos)].split('>')[1] + + soup1 = bs4.BeautifulSoup(htm1, 'lxml') + + unwrap_tags = [ 'html', 'body', 'p', 'b', 'pre', 'span', 'table', 'header' ] + for tag in unwrap_tags: + for match in soup1.findAll(tag): + match.unwrap() + + for f in soup1.findAll('a'): + for tag in f.findAll(True): + if str(tag).find('<None>') == -1: + if tag.name.upper() != 'IMG': + tag.decompose() + + remove_tags = [ 'svg', 'embed', 'head', 'noscript', 'object', 'param', 'script', 'option' ] + for tag in remove_tags: + [s.extract() for s in soup1(tag)] + + html = str(soup1) + + html = html.replace('<h1>','$PURPLE$') + html = html.replace('<H1>','$PURPLE$') + html = html.replace('</h1>','$BLACK$') + html = html.replace('</H1>','$BLACK$') + + html = html.replace('<u>','$UL,1$') + html = html.replace('<U>','$UL,1$') + html = html.replace('</u>','$UL,0$') + html = html.replace('</U>','$UL,0$') + + html = html.replace('<b>','$IV,1$') + html = html.replace('<B>','$IV,1$') + html = html.replace('</b>','$IV,0$') + html = html.replace('</B>','$IV,0$') + + a_pos = html.upper().find('<IMG ') + while a_pos != -1: + img_text = html[a_pos:].split('>')[0] + img_text.replace("'",'"') + img_src = '' + img_pos = img_text.upper().find('SRC') + if img_pos > 0: + img_src = img_text[img_text.upper().find('SRC'):].split('"')[1] + img_el = '[URIEL_IMG]' + img_src + '[/URIEL_IMG]' + html = html[:a_pos] + img_el + html[1+html.upper().find('>', a_pos):] + a_pos = html.upper().find('<IMG ') + + a_pos = html.upper().find('<BUTTON ') + while a_pos != -1: + button_text = html[a_pos:].split('>')[1] + button_text = button_text[:button_text.upper().find('</BUTTON')] + button_text = button_text.replace('"','\\"') + button_doctext = '$BT,"' + button_text + '"$' + html = html[:a_pos] + button_doctext + html[9+html.upper().find('</BUTTON>', a_pos):] + a_pos = html.upper().find('<BUTTON ') + + a_ctr = 0 + a_pos = html.upper().find('<A ') + while a_pos != -1: + link_pre = '' + link_text = html[a_pos:].split('>')[1] + link_text = link_text[:link_text.upper().find('</A')] + while link_text.find('[URIEL_IMG]') != -1: + link_pre += link_text[link_text.find('[URIEL_IMG]'):12+link_text.find('[/URIEL_IMG]')] + ' ' + link_text = link_text[:link_text.find('[URIEL_IMG]')] + link_text[12+link_text.find('[/URIEL_IMG]'):] + link_text = link_text.replace('"','\\"') + link_href = '' + link_pos = html[a_pos:html.upper().find('</A>', a_pos)].upper().find('HREF') + if link_pos > 0: + link_href = html[a_pos:html.upper().find('</A>', a_pos)][link_pos:].replace('\'','"').split('"')[1] + doldoc_link = '$AN,"",A="A' + str(a_ctr) + '"$$MA+LIS,"' + link_text + '",LM="U_Navigate(\\"A' + str(a_ctr) + '\\",\\"' + link_href + '\\");"$' + html = html[:a_pos] + link_pre + doldoc_link + html[4+html.upper().find('</A>', a_pos):] + a_ctr += 1 + a_pos = html.upper().find('<A ') + + a_pos = html.upper().find('<CENTER>') + while a_pos != -1: + center_text = html[a_pos:].split('>')[1] + center_text = center_text[:center_text.upper().find('</CENTER')] + center_text = center_text.replace('"','\\"') + if center_text.upper().find('[URIEL_IMG]') != -1: + center_doctext = center_text + else: + center_doctext = '$TX+CX,"' + center_text + '"$' + html = html[:a_pos] + center_doctext + html[9+html.upper().find('</CENTER>', a_pos):] + a_pos = html.upper().find('<CENTER>') + + html = html.replace('</div>','\n') + html = html.replace('</DIV>','\n') + + html = html.replace('</td>', ' ') + html = html.replace('</TD>', ' ') + + html = html.replace('</tr>', '\n') + html = html.replace('</TR>', '\n') + + a_pos = html.upper().find('<INPUT ') + while a_pos != -1: + input_text = html[a_pos:].split('>')[0] + input_text = input_text.replace("'", '"') + input_doctext = '[$UL,1$ $UL,0$]' + + t_text = '' + if input_text.upper().find('VALUE='): + t_t = input_text[input_text.upper().find('VALUE='):].split('"') + if len(t_t) > 2: + t_text = t_t[1] + + bt_text = t_text if t_text != '' else 'Button' + st_text = t_text if t_text != '' else 'Submit' + + if input_text.find('button') != -1: + input_doctext = '$BT,"' + bt_text + '"$' + if input_text.find('checkbox') != -1: + input_doctext = '$CB$' + if input_text.find('hidden') != -1: + input_doctext = '' + if input_text.find('submit') != -1: + input_doctext = '$BT,"' + st_text + '"$' + + html = html[:a_pos] + input_doctext + html[1+html.upper().find('>', a_pos):] + a_pos = html.upper().find('<INPUT ') + + a_pos = html.upper().find('<') + while a_pos != -1: + html = html[:a_pos] + html[1+html.upper().find('>', a_pos):] + a_pos = html.upper().find('<') + + html = html.replace('<','<') + html = html.replace('>','>') + html = html.replace('&','&') + html = html.replace(''','\'') + html = html.replace('"','"') + + img_a_ctr = 0 + while html.find('[URIEL_IMG]') != -1: + img_url = html[11+html.find('[URIEL_IMG]'):html.find('[/URIEL_IMG]')] + img_ma = '$AN,"",A="IMG' + str(img_a_ctr) + '"$$MA+LIS,"[IMG]",LM="U_InsertThumb(\\"IMG' + str(img_a_ctr) + '\\",\\"$IMIS$\\",\\"$IMIE$\\",\\"' + img_url + '\\");"$$AN,"",A="IMIS' + str(img_a_ctr) + '"$' + html = html[:html.find('[URIEL_IMG]')] + img_ma + html[12+html.find('[/URIEL_IMG]'):] + img_a_ctr += 1 + + hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="U_CloseBrowser;"$ $MA+LIS,"[Back]",LM="U_HistNav(0);"$ $MA+LIS,"[Fwd]",LM="U_HistNav(1);"$ $MA+LIS,"[Go]",LM="U_Browser(GetStr(\\"URL> \\"));"$ ' + title_text + '\n\n' + + ind_id = '' + o_html = '' + o_lj_ct = 0 + o_lj_indent = False + + for line in html.split('\n'): + if not o_lj_indent: + if line[0:13] == '$AN,"",A="IMG': + # Left Justified image detected. + ind_id = line.split('IMG')[1].split('"')[0] + line = line.replace('$IMIS$','IMIS' + ind_id) + line = line.replace('$IMIE$','IMIE' + ind_id) + o_lj_indent = True + + if o_lj_indent: + o_lj_ct += 1 + if o_lj_ct > 11: + line = '$AN,"",A="IMIE' + ind_id + '"$' + line + ind_id = '' + o_lj_ct = 0 + o_lj_indent = False + + line = line.replace('$IMIS$','') + line = line.replace('$IMIE$','') + o_html += line + '\n' + + return hb_header + o_html diff --git a/uriel_preprocessor.py b/uriel_preprocessor.py deleted file mode 100755 index f98ec83..0000000 --- a/uriel_preprocessor.py +++ /dev/null @@ -1,206 +0,0 @@ -import bs4 -import re, sys - -def preprocess(htm1, l_url): - title_text = '' - hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="CloseBrowser;"$ $MA+LIS,"[Back]",LM="Browser(\\"h:back\\");"$ $MA+LIS,"[Fwd]",LM="Browser(\\"h:fwd\\");"$ $MA+LIS,"[Go]",LM="Browser(GetStr(\\"\nURL> \\"));"$ ' + title_text + '\n\n' - - if htm1.upper().find('<HTML') == -1: - dl_link = '$AN,"",A="BINARY"$Click $MA+LIS,"[Here]",LM="Get(\\"retry:send\\",\\"~/Downloads/' + l_url.split('/')[len(l_url.split('/'))-1] + '\\");"$ to download the file:\n\n' + l_url - if l_url.split('.')[len(l_url.split('.'))-1].upper() == 'Z': - dl_link = dl_link.replace('retry:send', 'retry:sendZ') - return hb_header + dl_link - - htm1 = htm1[htm1.upper().find('<HTML'):] - htm1 = htm1.replace('$', '$$') - - htm1 = htm1.replace('<blockquote>',' ') - htm1 = htm1.replace('<BLOCKQUOTE>',' ') - - htm1 = htm1.replace('<br>','\n') - htm1 = htm1.replace('<br/>','\n') - htm1 = htm1.replace('<br />','\n') - htm1 = htm1.replace('<BR>','\n') - htm1 = htm1.replace('<BR/>','\n') - htm1 = htm1.replace('<BR />','\n') - - htm1 = htm1.replace('<li>',' * ') - htm1 = htm1.replace('<LI>',' * ') - - htm1 = htm1.replace('</img>','') - htm1 = htm1.replace('</IMG>','') - - title_text = '' - a_pos = htm1.upper().find('<TITLE>') - if a_pos != -1: - title_text = htm1[a_pos:htm1.find('</', a_pos)].split('>')[1] - - soup1 = bs4.BeautifulSoup(htm1, 'lxml') - - unwrap_tags = [ 'html', 'body', 'p', 'b', 'pre', 'span', 'table', 'header' ] - for tag in unwrap_tags: - for match in soup1.findAll(tag): - match.unwrap() - - for f in soup1.findAll('a'): - for tag in f.findAll(True): - if str(tag).find('<None>') == -1: - if tag.name.upper() != 'IMG': - tag.decompose() - - remove_tags = [ 'svg', 'embed', 'head', 'noscript', 'object', 'param', 'script', 'option' ] - for tag in remove_tags: - [s.extract() for s in soup1(tag)] - - html = str(soup1) - - html = html.replace('<h1>','$PURPLE$') - html = html.replace('<H1>','$PURPLE$') - html = html.replace('</h1>','$BLACK$') - html = html.replace('</H1>','$BLACK$') - - html = html.replace('<u>','$UL,1$') - html = html.replace('<U>','$UL,1$') - html = html.replace('</u>','$UL,0$') - html = html.replace('</U>','$UL,0$') - - html = html.replace('<b>','$IV,1$') - html = html.replace('<B>','$IV,1$') - html = html.replace('</b>','$IV,0$') - html = html.replace('</B>','$IV,0$') - - a_pos = html.upper().find('<IMG ') - while a_pos != -1: - img_text = html[a_pos:].split('>')[0] - img_text.replace("'",'"') - img_src = '' - img_pos = img_text.upper().find('SRC') - if img_pos > 0: - img_src = img_text[img_text.upper().find('SRC'):].split('"')[1] - img_el = '[URIEL_IMG]' + img_src + '[/URIEL_IMG]' - html = html[:a_pos] + img_el + html[1+html.upper().find('>', a_pos):] - a_pos = html.upper().find('<IMG ') - - a_pos = html.upper().find('<BUTTON ') - while a_pos != -1: - button_text = html[a_pos:].split('>')[1] - button_text = button_text[:button_text.upper().find('</BUTTON')] - button_text = button_text.replace('"','\\"') - button_doctext = '$BT,"' + button_text + '"$' - html = html[:a_pos] + button_doctext + html[9+html.upper().find('</BUTTON>', a_pos):] - a_pos = html.upper().find('<BUTTON ') - - a_ctr = 0 - a_pos = html.upper().find('<A ') - while a_pos != -1: - link_pre = '' - link_text = html[a_pos:].split('>')[1] - link_text = link_text[:link_text.upper().find('</A')] - while link_text.find('[URIEL_IMG]') != -1: - link_pre += link_text[link_text.find('[URIEL_IMG]'):12+link_text.find('[/URIEL_IMG]')] + ' ' - link_text = link_text[:link_text.find('[URIEL_IMG]')] + link_text[12+link_text.find('[/URIEL_IMG]'):] - link_text = link_text.replace('"','\\"') - link_href = '' - link_pos = html[a_pos:html.upper().find('</A>', a_pos)].upper().find('HREF') - if link_pos > 0: - link_href = html[a_pos:html.upper().find('</A>', a_pos)][link_pos:].replace('\'','"').split('"')[1] - doldoc_link = '$AN,"",A="A' + str(a_ctr) + '"$$MA+LIS,"' + link_text + '",LM="Navigate(\\"A' + str(a_ctr) + '\\",\\"' + link_href + '\\");"$' - html = html[:a_pos] + link_pre + doldoc_link + html[4+html.upper().find('</A>', a_pos):] - a_ctr += 1 - a_pos = html.upper().find('<A ') - - a_pos = html.upper().find('<CENTER>') - while a_pos != -1: - center_text = html[a_pos:].split('>')[1] - center_text = center_text[:center_text.upper().find('</CENTER')] - center_text = center_text.replace('"','\\"') - if center_text.upper().find('[URIEL_IMG]') != -1: - center_doctext = center_text - else: - center_doctext = '$TX+CX,"' + center_text + '"$' - html = html[:a_pos] + center_doctext + html[9+html.upper().find('</CENTER>', a_pos):] - a_pos = html.upper().find('<CENTER>') - - html = html.replace('</div>','\n') - html = html.replace('</DIV>','\n') - - html = html.replace('</td>', ' ') - html = html.replace('</TD>', ' ') - - html = html.replace('</tr>', '\n') - html = html.replace('</TR>', '\n') - - a_pos = html.upper().find('<INPUT ') - while a_pos != -1: - input_text = html[a_pos:].split('>')[0] - input_text = input_text.replace("'", '"') - input_doctext = '[$UL,1$ $UL,0$]' - - t_text = '' - if input_text.upper().find('VALUE='): - t_t = input_text[input_text.upper().find('VALUE='):].split('"') - if len(t_t) > 2: - t_text = t_t[1] - - bt_text = t_text if t_text != '' else 'Button' - st_text = t_text if t_text != '' else 'Submit' - - if input_text.find('button') != -1: - input_doctext = '$BT,"' + bt_text + '"$' - if input_text.find('checkbox') != -1: - input_doctext = '$CB$' - if input_text.find('hidden') != -1: - input_doctext = '' - if input_text.find('submit') != -1: - input_doctext = '$BT,"' + st_text + '"$' - - html = html[:a_pos] + input_doctext + html[1+html.upper().find('>', a_pos):] - a_pos = html.upper().find('<INPUT ') - - a_pos = html.upper().find('<') - while a_pos != -1: - html = html[:a_pos] + html[1+html.upper().find('>', a_pos):] - a_pos = html.upper().find('<') - - html = html.replace('<','<') - html = html.replace('>','>') - html = html.replace('&','&') - html = html.replace(''','\'') - html = html.replace('"','"') - - img_a_ctr = 0 - while html.find('[URIEL_IMG]') != -1: - img_url = html[11+html.find('[URIEL_IMG]'):html.find('[/URIEL_IMG]')] - img_ma = '$AN,"",A="IMG' + str(img_a_ctr) + '"$$MA+LIS,"[IMG]",LM="InsertImg(\\"IMG' + str(img_a_ctr) + '\\",\\"$IMIS$\\",\\"$IMIE$\\",\\"' + img_url + '.uriel_img\\");"$$AN,"",A="IMIS' + str(img_a_ctr) + '"$' - html = html[:html.find('[URIEL_IMG]')] + img_ma + html[12+html.find('[/URIEL_IMG]'):] - img_a_ctr += 1 - - hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="CloseBrowser;"$ $MA+LIS,"[Back]",LM="Browser(\\"h:back\\");"$ $MA+LIS,"[Fwd]",LM="Browser(\\"h:fwd\\");"$ $MA+LIS,"[Go]",LM="Browser(GetStr(\\"URL> \\"));"$ ' + title_text + '\n\n' - - ind_id = '' - o_html = '' - o_lj_ct = 0 - o_lj_indent = False - - for line in html.split('\n'): - if not o_lj_indent: - if line[0:13] == '$AN,"",A="IMG': - # Left Justified image detected. - ind_id = line.split('IMG')[1].split('"')[0] - line = line.replace('$IMIS$','IMIS' + ind_id) - line = line.replace('$IMIE$','IMIE' + ind_id) - o_lj_indent = True - - if o_lj_indent: - o_lj_ct += 1 - if o_lj_ct > 11: - line = '$AN,"",A="IMIE' + ind_id + '"$' + line - ind_id = '' - o_lj_ct = 0 - o_lj_indent = False - - line = line.replace('$IMIS$','') - line = line.replace('$IMIE$','') - o_html += line + '\n' - - return hb_header + o_html diff --git a/uriel_proxy b/uriel_proxy deleted file mode 100755 index 615d71b..0000000 --- a/uriel_proxy +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/python -from uriel_preprocessor import preprocess -import os, socket, subprocess, sys, time, urlparse, uuid - -HOST = '127.0.0.1' -PORT = 7202 - -DELIM_BIN_EOF = '*[U_EOF]' -DELIM_BIN_GET = '*[U_GET]' -DELIM_BIN_PUT = '*[U_PUT]' -DELIM_BIN_SOF = '*[U_SOF]' - -MODE_LISTEN = 0 -MODE_PUT_START = 1 -MODE_GET_START = 2 - -URIEL_VER_STR = 'Uriel/0.1' - -blk_size = 8 -delay_ms = .001 - -s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -try: - s.bind((HOST, PORT)) -except socket.error as msg: - sys.stdout.write('Error: failed to open socket\n') - sys.stdout.flush() - sys.exit() -s.listen(0) -sys.stdout.write('[uriel_proxy started]\n') -sys.stdout.flush() - -while 1: - conn, addr = s.accept() - last_buf = '' - cmd_in = '' - rel_url = '' - history = [] - hst_index = -1 - state = MODE_LISTEN - while 1: - if 1==1: - data = conn.recv(1024) - cmd_in += data - - if state == MODE_LISTEN: - if cmd_in.find(DELIM_BIN_GET) != -1: - state = MODE_GET_START - if state == MODE_GET_START: - if cmd_in.find('^') != -1: - get_file = cmd_in[cmd_in.find(DELIM_BIN_GET)+8:cmd_in.find('|')] - blk_ctr = 0 - if get_file[0:2] == '//': - get_file = 'http:' + get_file - if get_file.find('://') != -1: - headers = { 'User-Agent': URIEL_VER_STR + ' (' + cmd_in.split('^')[0].rsplit('|')[1] + ')' } - if get_file.find('.uriel_img') != -1: - tmp_img_file = '/tmp/' + str(uuid.uuid4()) + '.bmp' - while os.path.exists(tmp_img_file): - tmp_img_file = '/tmp/' + str(uuid.uuid4()) + '.bmp' - file = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + get_file.split('.uriel_img')[0] + '" 2>/dev/null | gm convert -resize 100x100 - -colors 16 "' + tmp_img_file + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] - file = open(tmp_img_file, "rb").read() - os.remove(tmp_img_file) - else: - file = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + get_file + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] - else: - file = '' - if get_file == 'retry:send': - file = last_buf - if get_file == 'retry:sendZ': - tmp_z_file = '/tmp/' + str(uuid.uuid4()) + '.Z' - while os.path.exists(tmp_z_file): - tmp_z_file = '/tmp/' + str(uuid.uuid4()) + '.Z' - open(tmp_z_file, "wb").write(last_buf) - z = subprocess.Popen('tosz "' + tmp_z_file + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] - file = open(tmp_z_file.split('.Z')[0], "rb").read() - os.remove(tmp_z_file.split('.Z')[0]) - if file == '': - file = open(get_file, "rb").read() - while blk_ctr < len(file): - conn.sendall(file[blk_ctr:blk_ctr+blk_size]) - blk_ctr += blk_size - time.sleep(delay_ms) - conn.sendall(DELIM_BIN_EOF+'\x00') - last_buf = file - cmd_in = "" - state = MODE_LISTEN - - if state == MODE_LISTEN: - if cmd_in.find(DELIM_BIN_PUT) != -1: - state = MODE_PUT_START - if state == MODE_PUT_START: - if cmd_in.find(DELIM_BIN_EOF) != -1: - put_files = cmd_in[cmd_in.find(DELIM_BIN_PUT)+8:cmd_in.find(DELIM_BIN_SOF)].split('|') - s_filename = put_files[0] - r_filename = put_files[1] - if r_filename == "": - r_filename = 'Xfer/' + s_filename.split('/')[len(s_filename.split('/'))-1] - open(r_filename,"wb").write(cmd_in[cmd_in.find(DELIM_BIN_SOF)+8:cmd_in.find(DELIM_BIN_EOF)]) - cmd_in = "" - state = MODE_LISTEN - - if cmd_in.find('^') != -1 and state == MODE_LISTEN: - user_agent = URIEL_VER_STR + ' (' + cmd_in.split('^')[0].rsplit('|')[1] + ')' - url = cmd_in.split('^')[0].rsplit('|')[0] - url = url.split('#')[0] - prot_ag_url = False - if url.lower()[0:4] != 'http': - if url[0:2] == '//': - url = 'http:' + url - prot_ag_url = True - if url.find('/') != -1 and not prot_ag_url: - if url.split('/')[0].find('.') != -1: - url = 'http://' + url - page_int = 0 - - if url == 'h:back': - if hst_index > 0: - hst_index -= 1 - url = history[hst_index]['url'] - page = history[hst_index]['page'] - page_int = 1 - if url == 'h:fwd': - if hst_index < len(history)-1: - hst_index += 1 - url = history[hst_index]['url'] - page = history[hst_index]['page'] - page_int = 1 - - if page_int == 0: - if url.find('://') == -1: - url = rel_url + url - url = url[:url.find('//')] + '//' + url[url.find('//')+2:].replace('//','/') - headers = { 'User-Agent': user_agent } - data = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] - page = preprocess(data, url) - if page.find('$AN,"",A="BINARY"$') != -1: - last_buf = data - hst_index += 1 - history = history[0:hst_index] - history.append({'url':url, 'page':page}) - - u_page = page - page = '' - u_idx = 0 - while u_idx < len(u_page): - if ord(u_page[u_idx:u_idx+1]) < 127: - page += u_page[u_idx:u_idx+1] - u_idx += 1 - - blk_ctr = 0 - while blk_ctr < len(page): - conn.sendall(page[blk_ctr:blk_ctr+blk_size]) - blk_ctr += blk_size - time.sleep(delay_ms) - - conn.sendall('\xFF') - if url.find('://') != -1: - r_url = urlparse.urlparse(url) - rel_url = r_url[0] + '://' + r_url[1] - r_path = '/' - if r_url[2] != '': - if r_url[2][r_url[2].rfind('/'):].find('.') != -1: - r_path = r_url[2][:r_url[2].rfind('/')] + r_path - else: - r_path = r_url[2] + r_path - rel_url = rel_url + r_path - i_page = '' - page = '' - cmd_in = '' - url = '' - - conn.close() - -s.close() -