Browse Source

Convert to HGBD module, updated README.

master
Alec Murphy 5 years ago
parent
commit
06a0e51ca0
  1. 33
      README.md
  2. 308
      Uriel.HC
  3. 432
      uriel.py
  4. 206
      uriel_preprocessor.py
  5. 176
      uriel_proxy

33
README.md

@ -1,39 +1,28 @@
# uriel
Uriel Web Browser & IRC Client for TempleOS
Uriel Web Browser & (not-yet) IRC Client for TempleOS
This is a proof-of-concept web browser & (soon) IRC client for TempleOS.
This is a proof-of-concept web browser & (soon) IRC client for TempleOS.
It is still very early stages.
Start the proxy on the Host:
Add the following to your `/etc/hgbdd.conf`:
```
./uriel_proxy
modules: {
"uriel":"/path/to/uriel.py"
}
```
and bind a chardev in QEMU (or serial2 in VMware/VirtualBox) to TCP 127.0.0.1:7202
In your `HomeKeyPlugIns.HC` or other startup script:
```
#include "Uriel"
#include "HGBD";
#include "Uriel";
```
You can launch a Browser in the current Task with:
```
Browser(url);
```
Get files with:
```
Get(url or host_path, local_path);
```
or Send files with:
```
Send(local_path, host_path);
U_Browser(url);
```
# Prerequisites
- wget
- [HGBD](https://github.com/tramplersheikhs/hgbd)
- Beautiful Soup 4 (for DolDoc preprocessing)
- GraphicsMagick (for inline images)
- TOSZ (to download .Z files)

308
Uriel.HC

@ -1,186 +1,150 @@
#define URIEL_BINARY_EOF "*[U_EOF]"
#define URIEL_BINARY_SOF "*[U_SOF]"
#define URIEL_FCMD_GET "*[U_GET]"
#define URIEL_FCMD_PUT "*[U_PUT]"
#define URIEL_TMP_IMG "/Tmp/uriel_img.bmp"
#define URIEL_PORT 2
#define URIEL_RAW_PORT 0x02F8
#define URIEL_TIMEOUT 1
#include "::/Doc/Comm"
#define URIEL_GETPAGE 0x10
#define URIEL_NAVBACK 0x11
#define URIEL_NAVFWD 0x12
#define URIEL_THUMB 0x13
#define URIEL_DOWNLOAD 0x14
U8 UrielPageBuf[2097152];
U8 UrielFileBuf[10485760];
I64 UrielFileBufSize=0;
I64 UrielPageBufSize=0;
I64 UrielGetFileAborted=0;
I64 UrielGetFileSuccess=0;
CComm *c=CommInit8n1(URIEL_PORT,115200);
#define URIEL_DL_PATH "::/Home/Downloads/"
#define URIEL_THUMB_BMP "/Tmp/UrielThumb.bmp"
#define URIEL_VERSION "Uriel/0.2"
U0 Get(U8 *remote_file, U8 *local_file="file") {
UrielGetFileAborted=0;
UrielGetFileSuccess=0;
Bool load=FALSE;
I64 b=0;
I64 PrevFileBufSize=0;
I64 StartTimer=0;
I64 TickTimer=0;
UrielFileBufSize=0;
CommPutS(URIEL_PORT, URIEL_FCMD_GET);
CommPutS(URIEL_PORT, remote_file);
CommPutS(URIEL_PORT, "|");
CommPutS(URIEL_PORT, Define("DD_OS_NAME_VERSION"));
CommPutS(URIEL_PORT, "^");
progress1_max=100;
progress1=0;
StrCpy(progress1_desc, "Receiving file");
while (!load) {
if ((TickTimer-StartTimer)>URIEL_TIMEOUT) { UrielFileBufSize=0; break; };
if (InU8(0x60)==0x01) { UrielGetFileAborted=1; UrielFileBufSize=0; break; };
PrevFileBufSize=UrielFileBufSize;
Sleep(0);
while (FifoU8Rem(c->RX_fifo,&b)) {
if (b != 0x00) {
UrielFileBuf[UrielFileBufSize] = b;
UrielFileBufSize++;
progress1++;
if (progress1>progress1_max) { progress1=0; };
}
else {
if (StrCmp(UrielFileBuf+(UrielFileBufSize-8), URIEL_BINARY_EOF)==0) {
UrielFileBufSize -= 8;
load=TRUE;
UrielGetFileSuccess=1;
break;
}
else {
UrielFileBuf[UrielFileBufSize] = b;
UrielFileBufSize++;
progress1++;
if (progress1>progress1_max) { progress1=0; };
}
}
}
if (PrevFileBufSize==UrielFileBufSize) {
if (StartTimer==0) { StartTimer=SysTimerRead/1000000; };
TickTimer=SysTimerRead/1000000;
}
}
FileWrite(local_file, UrielFileBuf, UrielFileBufSize);
ProgressBarsRst;
}
U8 URIEL_USER_AGENT[64];
StrCpy(URIEL_USER_AGENT, URIEL_VERSION);
StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), " (");
StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), Define("DD_OS_NAME_VERSION"));
StrCpy(URIEL_USER_AGENT+StrLen(URIEL_USER_AGENT), ")");
U0 Send(U8 *local_file, U8 *remote_file="") {
CDirEntry *d_file = FilesFind(local_file);
U8 *s_file = FileRead(d_file->full_name, d_file->size);
I64 f_pos=0;
CommPutS(URIEL_PORT, URIEL_FCMD_PUT);
CommPutS(URIEL_PORT, local_file);
CommPutS(URIEL_PORT, "|");
CommPutS(URIEL_PORT, remote_file);
CommPutS(URIEL_PORT, URIEL_BINARY_SOF);
progress1_max=d_file->size;
progress1=0;
StrCpy(progress1_desc, "Sending file");
for (f_pos=0;f_pos<d_file->size;f_pos++) {
if (InU8(0x60)==0x01) { UrielFileBufSize=0; break; };
Sleep(0);
progress1=f_pos;
OutU8(URIEL_RAW_PORT, s_file[f_pos]);
}
CommPutS(URIEL_PORT, URIEL_BINARY_EOF);
ProgressBarsRst;
Free(s_file);
Free(d_file);
U0 U_CloseBrowser()
{
Bool close=FALSE;
close = PopUpCancelOk("Close Uriel Browser?");
if (close) { In("x\n"); };
}
U0 InsertImg(U8 *img_anchor, U8 *img_sid, U8 *img_eid, U8 *img_url) {
UrielGetFileAborted=0;
UrielGetFileSuccess=0;
DocAnchorFind(DocPut, img_anchor);
Get(img_url, URIEL_TMP_IMG);
while (UrielGetFileAborted==0 && UrielGetFileSuccess==0) {
DocAnchorFind(DocPut, img_anchor);
Get("retry:send", URIEL_TMP_IMG);
}
if (UrielGetFileAborted==0) {
DocAnchorFind(DocPut, img_anchor);
DocBMP(,URIEL_TMP_IMG);
if (StrCmp(img_sid, "")!=0) {
DocAnchorFind(DocPut, img_sid);
DocPrintPartial(DocPut, "$ID,14$");
}
if (StrCmp(img_eid, "")!=0) {
DocAnchorFind(DocPut, img_eid);
DocPrintPartial(DocPut, "$ID,-14$");
}
DocAnchorFind(DocPut, img_anchor);
}
UrielGetFileAborted=0;
UrielGetFileSuccess=0;
U0 U_InsertThumb(U8 *anchor, U8 *indent, U8 *outdent, U8 *url)
{
DocAnchorFind(DocPut,anchor);
I64 size;
ZeroParamBuf;
StrCpy(HGBD_PARAM_BUF,URIEL_USER_AGENT);
WriteParamBuf;
BlkWrite(HGBD,url,1,(StrLen(url)/BLK_SIZE)+1);
HGExec(URIEL_THUMB);
ReadParamBuf;
size = Str2I64(HGBD_PARAM_BUF);
if (size==-1) {
PopUpOk("Error loading image.");
return;
};
BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1);
FileWrite(URIEL_THUMB_BMP, HGFS_BUF, size);
DocAnchorFind(DocPut,anchor);
DocBMP(,URIEL_THUMB_BMP);
if (StrCmp(indent,"")!=0) {
DocAnchorFind(DocPut,indent);
DocPrintPartial(DocPut,"$ID,14$");
};
if (StrCmp(indent,"")!=0) {
DocAnchorFind(DocPut,outdent);
DocPrintPartial(DocPut,"$ID,-14$");
};
DocAnchorFind(DocPut,anchor);
}
U0 CloseBrowser() {
Bool close=FALSE;
close = PopUpCancelOk("Close Browser?");
if (close) { In("x\n"); };
U0 U_Browser(U8 *url)
{
I64 size;
U8 *cmd;
ZeroParamBuf;
StrCpy(HGBD_PARAM_BUF,URIEL_USER_AGENT);
WriteParamBuf;
BlkWrite(HGBD,url,1,(StrLen(url)/BLK_SIZE)+1);
HGExec(URIEL_GETPAGE);
ReadParamBuf;
size = Str2I64(HGBD_PARAM_BUF);
if (size==-1) {
PopUpOk("Bad URL.");
return;
};
BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1);
MemSetU8(ToI64(HGFS_BUF)+size, 0x0, 1);
WinMax;
CDoc *page = DocNew();
DocPrintPartial(page,HGFS_BUF);
DocClear;
DocInsDoc(DocPut,page);
DocTop;
DocDel(page);
while (TRUE) {
cmd = GetStr;
if (StrCmp(cmd,"x")==0) { break; };
}
Free(cmd);
DocBottom;
}
U0 Browser(U8 *url)
U0 U_HistNav(I64 index)
{
WinMax;
U8 *Cmd;
UrielPageBufSize=0;
I64 UrielPageBufPos=0;
CommPutS(URIEL_PORT, url);
CommPutS(URIEL_PORT, "|");
CommPutS(URIEL_PORT, Define("DD_OS_NAME_VERSION"));
CommPutS(URIEL_PORT, "^");
CDoc *UrielPage = DocNew();
I64 b=0;
Bool load=FALSE;
Bool proc=FALSE;
Bool pad=TRUE;
progress1_max=100;
progress1=0;
StrCpy(progress1_desc, "Request sent, waiting for response");
while (!load) {
if (InU8(0x60)==0x01) { UrielPageBufSize=0; break; };
Sleep(0);
while (FifoU8Rem(c->RX_fifo,&b)) {
if (b == 0xFF && !pad) { load=TRUE; break; };
if (b != 0xFF && pad) { pad=FALSE; };
if (b != 0xFF && !pad) {
if (!proc) { proc=TRUE; StrCpy(progress1_desc, "Processing DolDoc"); };
UrielPageBuf[UrielPageBufSize] = b;
UrielPageBufSize++;
progress1++;
if (progress1>progress1_max) { progress1=0; };
}
}
}
while (UrielPageBufPos<UrielPageBufSize) {
DocPrintPartial(UrielPage, "%c", UrielPageBuf[UrielPageBufPos]);
UrielPageBufPos++;
}
I64 size;
U8 *cmd;
ZeroParamBuf;
if (index==0) { HGExec(URIEL_NAVBACK); };
if (index==1) { HGExec(URIEL_NAVFWD); };
ReadParamBuf;
size = Str2I64(HGBD_PARAM_BUF);
if (size==-1) {
PopUpOk("Bad URL.");
return;
};
BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1);
MemSetU8(ToI64(HGFS_BUF)+size, 0x0, 1);
WinMax;
CDoc *page = DocNew();
DocPrintPartial(page,HGFS_BUF);
DocClear;
DocInsDoc(DocPut,page);
DocTop;
DocDel(page);
while (TRUE) {
cmd = GetStr;
if (StrCmp(cmd,"x")==0) { break; };
}
Free(cmd);
DocBottom;
}
ProgressBarsRst;
DocClear;
DocInsDoc(DocPut, UrielPage);
DocTop;
DocDel(UrielPage);
while (TRUE) {
Cmd = GetStr;
if(StrCmp(Cmd, "x")==0) { break; };
}
Free(Cmd);
DocBottom;
U0 U_Download(U8 *url)
{
I64 size;
U8 localfile[StrLen(URIEL_DL_PATH)+28];
StrCpy(localfile,URIEL_DL_PATH);
if (StrLen(StrLastOcc(url,"/")+1)>0) {
StrCpy(localfile+StrLen(localfile), StrLastOcc(url,"/")+1);
} else {
StrCpy(localfile+StrLen(localfile), "Download.OUT");
};
ZeroParamBuf;
HGExec(URIEL_DOWNLOAD);
ReadParamBuf;
size = Str2I64(HGBD_PARAM_BUF);
if (size==-1) {
PopUpOk("Bad URL.");
return;
};
BlkRead(HGBD,HGFS_BUF,1,(size/BLK_SIZE)+1);
if (StrCmp(localfile+StrLen(localfile)-2,".Z")==0) {
U8 uzlocalfile[28];
MemCpy(uzlocalfile,localfile,StrLen(localfile)-2);
uzlocalfile[StrLen(localfile)-1] = 0x0;
FileWrite(uzlocalfile, HGFS_BUF, size);
Move(uzlocalfile, localfile);
} else {
FileWrite(localfile, HGFS_BUF, size);
};
PopUpOk("Downloaded file: %s", localfile);
}
U0 Navigate(U8 *anchor, U8 *url) {
DocAnchorFind(DocPut, anchor);
Browser(url);
U0 U_Navigate(U8 *anchor, U8 *url) {
DocAnchorFind(DocPut,anchor);
U_Browser(url);
}

432
uriel.py

@ -0,0 +1,432 @@
import bs4
import re
URIEL_GETPAGE = 0x10
URIEL_NAVBACK = 0x11
URIEL_NAVFWD = 0x12
URIEL_THUMB = 0x13
URIEL_DOWNLOAD = 0x14
class Uriel:
download_buffer = ''
user_agent = ''
history = []
nav_index = -1
class rel:
scheme = ''
netloc = ''
path = ''
def uriel(data):
if data == URIEL_GETPAGE:
UrielGetPage()
if data == URIEL_NAVBACK:
UrielNavBack()
if data == URIEL_NAVFWD:
UrielNavFwd()
if data == URIEL_THUMB:
UrielThumb()
if data == URIEL_DOWNLOAD:
UrielDownload()
def UrielGetPage():
global Uriel
os.lseek(HGBD,0,os.SEEK_SET)
HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE)
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4)
if Uriel.user_agent == '':
Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')]
url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')])
scheme = ''
netloc = ''
path = ''
if url_comp.scheme == '':
scheme = Uriel.rel.scheme
else:
scheme = url_comp.scheme
Uriel.rel.scheme = url_comp.scheme
if url_comp.netloc == '':
netloc = Uriel.rel.netloc
else:
netloc = url_comp.netloc
Uriel.rel.netloc = url_comp.netloc
if url_comp.path != '':
if url_comp.path.find('/') != -1:
path = url_comp.path
Uriel.rel.path = url_comp.path[:url_comp.path.rfind('/')+1]
else:
path = Uriel.rel.path + url_comp.path
url = scheme + "://" + netloc + urllib.quote(path)
pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
filedata = UrielPreProcess(pagedata, url)
filesize = len(filedata)
if filesize>0:
if filedata.find('$AN,"",A="BINARY"$') != -1:
Uriel.download_buffer = pagedata
Uriel.nav_index += 1
Uriel.history = Uriel.history[0:Uriel.nav_index]
Uriel.history.append({'url':url, 'filedata':filedata})
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
os.write(HGBD,filedata)
logger.info("[Uriel] navigate to " + url)
else:
filesize = -1
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
logger.error("[Uriel] error reading url " + url)
conn.send(chr(URIEL_GETPAGE))
def UrielNavBack():
global Uriel
if Uriel.nav_index > 0:
Uriel.nav_index -= 1
url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url'])
filedata = Uriel.history[Uriel.nav_index]['filedata']
scheme = ''
netloc = ''
path = ''
if url_comp.scheme == '':
scheme = Uriel.rel.scheme
else:
scheme = url_comp.scheme
if url_comp.netloc == '':
netloc = Uriel.rel.netloc
else:
netloc = url_comp.netloc
if url_comp.path != '':
if url_comp.path.find('/') != -1:
path = url_comp.path
else:
path = Uriel.rel.path + url_comp.path
url = scheme + "://" + netloc + urllib.quote(path)
filesize = len(filedata)
if filesize>0:
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
os.write(HGBD,filedata)
logger.info("[Uriel] history navigate back to " + url)
else:
filesize = -1
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
logger.error("[Uriel] error reading history for url " + url)
conn.send(chr(URIEL_NAVBACK))
def UrielNavFwd():
global Uriel
if Uriel.nav_index < len(Uriel.history)-1:
Uriel.nav_index += 1
url_comp = urlparse.urlparse(Uriel.history[Uriel.nav_index]['url'])
filedata = Uriel.history[Uriel.nav_index]['filedata']
scheme = ''
netloc = ''
path = ''
if url_comp.scheme == '':
scheme = Uriel.rel.scheme
else:
scheme = url_comp.scheme
if url_comp.netloc == '':
netloc = Uriel.rel.netloc
else:
netloc = url_comp.netloc
if url_comp.path != '':
if url_comp.path.find('/') != -1:
path = url_comp.path
else:
path = Uriel.rel.path + url_comp.path
url = scheme + "://" + netloc + urllib.quote(path)
filesize = len(filedata)
if filesize>0:
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
os.write(HGBD,filedata)
logger.info("[Uriel] history navigate fwd to " + url)
else:
filesize = -1
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
logger.error("[Uriel] error reading history for url " + url)
conn.send(chr(URIEL_NAVFWD))
def UrielThumb():
global Uriel
os.lseek(HGBD,0,os.SEEK_SET)
HGBD_PARAM_BUF = os.read(HGBD,BLK_SIZE)
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
HGBD_URL_BUF = os.read(HGBD,BLK_SIZE*4)
if Uriel.user_agent == '':
Uriel.user_agent = HGBD_PARAM_BUF[:HGBD_PARAM_BUF.find('\x00')]
url_comp = urlparse.urlparse(HGBD_URL_BUF[:HGBD_URL_BUF.find('\x00')])
scheme = ''
netloc = ''
path = ''
if url_comp.scheme == '':
scheme = Uriel.rel.scheme
else:
scheme = url_comp.scheme
if url_comp.netloc == '':
netloc = Uriel.rel.netloc
else:
netloc = url_comp.netloc
if url_comp.path != '':
if url_comp.path.find('/') != -1:
path = url_comp.path
else:
path = Uriel.rel.path + url_comp.path
url = scheme + "://" + netloc + urllib.quote(path)
tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp'
while os.path.exists(tmp_thumb):
tmp_thumb = '/tmp/' + str(uuid.uuid4()) + '.bmp'
pagedata = subprocess.Popen('wget -q -O - -U "' + Uriel.user_agent + '" "' + url + '" 2>/dev/null | gm convert -resize 100x100 - -colors 16 "' + tmp_thumb + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
filedata = open(tmp_thumb,"rb").read()
try:
os.remove(tmp_thumb)
except:
pass
filesize = len(filedata)
if filesize>0:
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
os.write(HGBD,filedata)
logger.info("[Uriel] get image thumbnail " + url)
else:
filesize = -1
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
logger.error("[Uriel] error reading url " + url)
conn.send(chr(URIEL_THUMB))
def UrielDownload():
global Uriel
filedata = Uriel.download_buffer
filesize = len(filedata)
if filesize>0:
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
os.lseek(HGBD,BLK_SIZE,os.SEEK_SET)
os.write(HGBD,filedata)
logger.info("[Uriel] binary, download file")
else:
filesize = -1
ZeroParamBuf()
os.lseek(HGBD,0,os.SEEK_SET)
os.write(HGBD,str(filesize))
logger.error("[Uriel] error downloading file")
conn.send(chr(URIEL_DOWNLOAD))
def UrielPreProcess(htm1, l_url):
title_text = ''
hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="U_CloseBrowser;"$ $MA+LIS,"[Back]",LM="U_HistNav(0);"$ $MA+LIS,"[Fwd]",LM="U_HistNav(1);"$ $MA+LIS,"[Go]",LM="U_Browser(GetStr(\\"\nURL> \\"));"$ ' + title_text + '\n\n'
if htm1.upper().find('<HTML') == -1:
dl_link = '$AN,"",A="BINARY"$Click $MA+LIS,"[Here]",LM="U_Download(\\"' + l_url + '\\");"$ to download the file:\n\n' + l_url
return hb_header + dl_link
htm1 = htm1[htm1.upper().find('<HTML'):]
htm1 = htm1.replace('$', '$$')
htm1 = htm1.replace('<blockquote>',' ')
htm1 = htm1.replace('<BLOCKQUOTE>',' ')
htm1 = htm1.replace('<br>','\n')
htm1 = htm1.replace('<br/>','\n')
htm1 = htm1.replace('<br />','\n')
htm1 = htm1.replace('<BR>','\n')
htm1 = htm1.replace('<BR/>','\n')
htm1 = htm1.replace('<BR />','\n')
htm1 = htm1.replace('<li>',' * ')
htm1 = htm1.replace('<LI>',' * ')
htm1 = htm1.replace('</img>','')
htm1 = htm1.replace('</IMG>','')
title_text = ''
a_pos = htm1.upper().find('<TITLE>')
if a_pos != -1:
title_text = htm1[a_pos:htm1.find('</', a_pos)].split('>')[1]
soup1 = bs4.BeautifulSoup(htm1, 'lxml')
unwrap_tags = [ 'html', 'body', 'p', 'b', 'pre', 'span', 'table', 'header' ]
for tag in unwrap_tags:
for match in soup1.findAll(tag):
match.unwrap()
for f in soup1.findAll('a'):
for tag in f.findAll(True):
if str(tag).find('<None>') == -1:
if tag.name.upper() != 'IMG':
tag.decompose()
remove_tags = [ 'svg', 'embed', 'head', 'noscript', 'object', 'param', 'script', 'option' ]
for tag in remove_tags:
[s.extract() for s in soup1(tag)]
html = str(soup1)
html = html.replace('<h1>','$PURPLE$')
html = html.replace('<H1>','$PURPLE$')
html = html.replace('</h1>','$BLACK$')
html = html.replace('</H1>','$BLACK$')
html = html.replace('<u>','$UL,1$')
html = html.replace('<U>','$UL,1$')
html = html.replace('</u>','$UL,0$')
html = html.replace('</U>','$UL,0$')
html = html.replace('<b>','$IV,1$')
html = html.replace('<B>','$IV,1$')
html = html.replace('</b>','$IV,0$')
html = html.replace('</B>','$IV,0$')
a_pos = html.upper().find('<IMG ')
while a_pos != -1:
img_text = html[a_pos:].split('>')[0]
img_text.replace("'",'"')
img_src = ''
img_pos = img_text.upper().find('SRC')
if img_pos > 0:
img_src = img_text[img_text.upper().find('SRC'):].split('"')[1]
img_el = '[URIEL_IMG]' + img_src + '[/URIEL_IMG]'
html = html[:a_pos] + img_el + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<IMG ')
a_pos = html.upper().find('<BUTTON ')
while a_pos != -1:
button_text = html[a_pos:].split('>')[1]
button_text = button_text[:button_text.upper().find('</BUTTON')]
button_text = button_text.replace('"','\\"')
button_doctext = '$BT,"' + button_text + '"$'
html = html[:a_pos] + button_doctext + html[9+html.upper().find('</BUTTON>', a_pos):]
a_pos = html.upper().find('<BUTTON ')
a_ctr = 0
a_pos = html.upper().find('<A ')
while a_pos != -1:
link_pre = ''
link_text = html[a_pos:].split('>')[1]
link_text = link_text[:link_text.upper().find('</A')]
while link_text.find('[URIEL_IMG]') != -1:
link_pre += link_text[link_text.find('[URIEL_IMG]'):12+link_text.find('[/URIEL_IMG]')] + ' '
link_text = link_text[:link_text.find('[URIEL_IMG]')] + link_text[12+link_text.find('[/URIEL_IMG]'):]
link_text = link_text.replace('"','\\"')
link_href = ''
link_pos = html[a_pos:html.upper().find('</A>', a_pos)].upper().find('HREF')
if link_pos > 0:
link_href = html[a_pos:html.upper().find('</A>', a_pos)][link_pos:].replace('\'','"').split('"')[1]
doldoc_link = '$AN,"",A="A' + str(a_ctr) + '"$$MA+LIS,"' + link_text + '",LM="U_Navigate(\\"A' + str(a_ctr) + '\\",\\"' + link_href + '\\");"$'
html = html[:a_pos] + link_pre + doldoc_link + html[4+html.upper().find('</A>', a_pos):]
a_ctr += 1
a_pos = html.upper().find('<A ')
a_pos = html.upper().find('<CENTER>')
while a_pos != -1:
center_text = html[a_pos:].split('>')[1]
center_text = center_text[:center_text.upper().find('</CENTER')]
center_text = center_text.replace('"','\\"')
if center_text.upper().find('[URIEL_IMG]') != -1:
center_doctext = center_text
else:
center_doctext = '$TX+CX,"' + center_text + '"$'
html = html[:a_pos] + center_doctext + html[9+html.upper().find('</CENTER>', a_pos):]
a_pos = html.upper().find('<CENTER>')
html = html.replace('</div>','\n')
html = html.replace('</DIV>','\n')
html = html.replace('</td>', ' ')
html = html.replace('</TD>', ' ')
html = html.replace('</tr>', '\n')
html = html.replace('</TR>', '\n')
a_pos = html.upper().find('<INPUT ')
while a_pos != -1:
input_text = html[a_pos:].split('>')[0]
input_text = input_text.replace("'", '"')
input_doctext = '[$UL,1$ $UL,0$]'
t_text = ''
if input_text.upper().find('VALUE='):
t_t = input_text[input_text.upper().find('VALUE='):].split('"')
if len(t_t) > 2:
t_text = t_t[1]
bt_text = t_text if t_text != '' else 'Button'
st_text = t_text if t_text != '' else 'Submit'
if input_text.find('button') != -1:
input_doctext = '$BT,"' + bt_text + '"$'
if input_text.find('checkbox') != -1:
input_doctext = '$CB$'
if input_text.find('hidden') != -1:
input_doctext = ''
if input_text.find('submit') != -1:
input_doctext = '$BT,"' + st_text + '"$'
html = html[:a_pos] + input_doctext + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<INPUT ')
a_pos = html.upper().find('<')
while a_pos != -1:
html = html[:a_pos] + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<')
html = html.replace('&lt;','<')
html = html.replace('&gt;','>')
html = html.replace('&amp;','&')
html = html.replace('&apos;','\'')
html = html.replace('&quot;','"')
img_a_ctr = 0
while html.find('[URIEL_IMG]') != -1:
img_url = html[11+html.find('[URIEL_IMG]'):html.find('[/URIEL_IMG]')]
img_ma = '$AN,"",A="IMG' + str(img_a_ctr) + '"$$MA+LIS,"[IMG]",LM="U_InsertThumb(\\"IMG' + str(img_a_ctr) + '\\",\\"$IMIS$\\",\\"$IMIE$\\",\\"' + img_url + '\\");"$$AN,"",A="IMIS' + str(img_a_ctr) + '"$'
html = html[:html.find('[URIEL_IMG]')] + img_ma + html[12+html.find('[/URIEL_IMG]'):]
img_a_ctr += 1
hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="U_CloseBrowser;"$ $MA+LIS,"[Back]",LM="U_HistNav(0);"$ $MA+LIS,"[Fwd]",LM="U_HistNav(1);"$ $MA+LIS,"[Go]",LM="U_Browser(GetStr(\\"URL> \\"));"$ ' + title_text + '\n\n'
ind_id = ''
o_html = ''
o_lj_ct = 0
o_lj_indent = False
for line in html.split('\n'):
if not o_lj_indent:
if line[0:13] == '$AN,"",A="IMG':
# Left Justified image detected.
ind_id = line.split('IMG')[1].split('"')[0]
line = line.replace('$IMIS$','IMIS' + ind_id)
line = line.replace('$IMIE$','IMIE' + ind_id)
o_lj_indent = True
if o_lj_indent:
o_lj_ct += 1
if o_lj_ct > 11:
line = '$AN,"",A="IMIE' + ind_id + '"$' + line
ind_id = ''
o_lj_ct = 0
o_lj_indent = False
line = line.replace('$IMIS$','')
line = line.replace('$IMIE$','')
o_html += line + '\n'
return hb_header + o_html

206
uriel_preprocessor.py

@ -1,206 +0,0 @@
import bs4
import re, sys
def preprocess(htm1, l_url):
title_text = ''
hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="CloseBrowser;"$ $MA+LIS,"[Back]",LM="Browser(\\"h:back\\");"$ $MA+LIS,"[Fwd]",LM="Browser(\\"h:fwd\\");"$ $MA+LIS,"[Go]",LM="Browser(GetStr(\\"\nURL> \\"));"$ ' + title_text + '\n\n'
if htm1.upper().find('<HTML') == -1:
dl_link = '$AN,"",A="BINARY"$Click $MA+LIS,"[Here]",LM="Get(\\"retry:send\\",\\"~/Downloads/' + l_url.split('/')[len(l_url.split('/'))-1] + '\\");"$ to download the file:\n\n' + l_url
if l_url.split('.')[len(l_url.split('.'))-1].upper() == 'Z':
dl_link = dl_link.replace('retry:send', 'retry:sendZ')
return hb_header + dl_link
htm1 = htm1[htm1.upper().find('<HTML'):]
htm1 = htm1.replace('$', '$$')
htm1 = htm1.replace('<blockquote>',' ')
htm1 = htm1.replace('<BLOCKQUOTE>',' ')
htm1 = htm1.replace('<br>','\n')
htm1 = htm1.replace('<br/>','\n')
htm1 = htm1.replace('<br />','\n')
htm1 = htm1.replace('<BR>','\n')
htm1 = htm1.replace('<BR/>','\n')
htm1 = htm1.replace('<BR />','\n')
htm1 = htm1.replace('<li>',' * ')
htm1 = htm1.replace('<LI>',' * ')
htm1 = htm1.replace('</img>','')
htm1 = htm1.replace('</IMG>','')
title_text = ''
a_pos = htm1.upper().find('<TITLE>')
if a_pos != -1:
title_text = htm1[a_pos:htm1.find('</', a_pos)].split('>')[1]
soup1 = bs4.BeautifulSoup(htm1, 'lxml')
unwrap_tags = [ 'html', 'body', 'p', 'b', 'pre', 'span', 'table', 'header' ]
for tag in unwrap_tags:
for match in soup1.findAll(tag):
match.unwrap()
for f in soup1.findAll('a'):
for tag in f.findAll(True):
if str(tag).find('<None>') == -1:
if tag.name.upper() != 'IMG':
tag.decompose()
remove_tags = [ 'svg', 'embed', 'head', 'noscript', 'object', 'param', 'script', 'option' ]
for tag in remove_tags:
[s.extract() for s in soup1(tag)]
html = str(soup1)
html = html.replace('<h1>','$PURPLE$')
html = html.replace('<H1>','$PURPLE$')
html = html.replace('</h1>','$BLACK$')
html = html.replace('</H1>','$BLACK$')
html = html.replace('<u>','$UL,1$')
html = html.replace('<U>','$UL,1$')
html = html.replace('</u>','$UL,0$')
html = html.replace('</U>','$UL,0$')
html = html.replace('<b>','$IV,1$')
html = html.replace('<B>','$IV,1$')
html = html.replace('</b>','$IV,0$')
html = html.replace('</B>','$IV,0$')
a_pos = html.upper().find('<IMG ')
while a_pos != -1:
img_text = html[a_pos:].split('>')[0]
img_text.replace("'",'"')
img_src = ''
img_pos = img_text.upper().find('SRC')
if img_pos > 0:
img_src = img_text[img_text.upper().find('SRC'):].split('"')[1]
img_el = '[URIEL_IMG]' + img_src + '[/URIEL_IMG]'
html = html[:a_pos] + img_el + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<IMG ')
a_pos = html.upper().find('<BUTTON ')
while a_pos != -1:
button_text = html[a_pos:].split('>')[1]
button_text = button_text[:button_text.upper().find('</BUTTON')]
button_text = button_text.replace('"','\\"')
button_doctext = '$BT,"' + button_text + '"$'
html = html[:a_pos] + button_doctext + html[9+html.upper().find('</BUTTON>', a_pos):]
a_pos = html.upper().find('<BUTTON ')
a_ctr = 0
a_pos = html.upper().find('<A ')
while a_pos != -1:
link_pre = ''
link_text = html[a_pos:].split('>')[1]
link_text = link_text[:link_text.upper().find('</A')]
while link_text.find('[URIEL_IMG]') != -1:
link_pre += link_text[link_text.find('[URIEL_IMG]'):12+link_text.find('[/URIEL_IMG]')] + ' '
link_text = link_text[:link_text.find('[URIEL_IMG]')] + link_text[12+link_text.find('[/URIEL_IMG]'):]
link_text = link_text.replace('"','\\"')
link_href = ''
link_pos = html[a_pos:html.upper().find('</A>', a_pos)].upper().find('HREF')
if link_pos > 0:
link_href = html[a_pos:html.upper().find('</A>', a_pos)][link_pos:].replace('\'','"').split('"')[1]
doldoc_link = '$AN,"",A="A' + str(a_ctr) + '"$$MA+LIS,"' + link_text + '",LM="Navigate(\\"A' + str(a_ctr) + '\\",\\"' + link_href + '\\");"$'
html = html[:a_pos] + link_pre + doldoc_link + html[4+html.upper().find('</A>', a_pos):]
a_ctr += 1
a_pos = html.upper().find('<A ')
a_pos = html.upper().find('<CENTER>')
while a_pos != -1:
center_text = html[a_pos:].split('>')[1]
center_text = center_text[:center_text.upper().find('</CENTER')]
center_text = center_text.replace('"','\\"')
if center_text.upper().find('[URIEL_IMG]') != -1:
center_doctext = center_text
else:
center_doctext = '$TX+CX,"' + center_text + '"$'
html = html[:a_pos] + center_doctext + html[9+html.upper().find('</CENTER>', a_pos):]
a_pos = html.upper().find('<CENTER>')
html = html.replace('</div>','\n')
html = html.replace('</DIV>','\n')
html = html.replace('</td>', ' ')
html = html.replace('</TD>', ' ')
html = html.replace('</tr>', '\n')
html = html.replace('</TR>', '\n')
a_pos = html.upper().find('<INPUT ')
while a_pos != -1:
input_text = html[a_pos:].split('>')[0]
input_text = input_text.replace("'", '"')
input_doctext = '[$UL,1$ $UL,0$]'
t_text = ''
if input_text.upper().find('VALUE='):
t_t = input_text[input_text.upper().find('VALUE='):].split('"')
if len(t_t) > 2:
t_text = t_t[1]
bt_text = t_text if t_text != '' else 'Button'
st_text = t_text if t_text != '' else 'Submit'
if input_text.find('button') != -1:
input_doctext = '$BT,"' + bt_text + '"$'
if input_text.find('checkbox') != -1:
input_doctext = '$CB$'
if input_text.find('hidden') != -1:
input_doctext = ''
if input_text.find('submit') != -1:
input_doctext = '$BT,"' + st_text + '"$'
html = html[:a_pos] + input_doctext + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<INPUT ')
a_pos = html.upper().find('<')
while a_pos != -1:
html = html[:a_pos] + html[1+html.upper().find('>', a_pos):]
a_pos = html.upper().find('<')
html = html.replace('&lt;','<')
html = html.replace('&gt;','>')
html = html.replace('&amp;','&')
html = html.replace('&apos;','\'')
html = html.replace('&quot;','"')
img_a_ctr = 0
while html.find('[URIEL_IMG]') != -1:
img_url = html[11+html.find('[URIEL_IMG]'):html.find('[/URIEL_IMG]')]
img_ma = '$AN,"",A="IMG' + str(img_a_ctr) + '"$$MA+LIS,"[IMG]",LM="InsertImg(\\"IMG' + str(img_a_ctr) + '\\",\\"$IMIS$\\",\\"$IMIE$\\",\\"' + img_url + '.uriel_img\\");"$$AN,"",A="IMIS' + str(img_a_ctr) + '"$'
html = html[:html.find('[URIEL_IMG]')] + img_ma + html[12+html.find('[/URIEL_IMG]'):]
img_a_ctr += 1
hb_header = '$WW,1$$BLACK$$MA+LIS,"[Close]",LM="CloseBrowser;"$ $MA+LIS,"[Back]",LM="Browser(\\"h:back\\");"$ $MA+LIS,"[Fwd]",LM="Browser(\\"h:fwd\\");"$ $MA+LIS,"[Go]",LM="Browser(GetStr(\\"URL> \\"));"$ ' + title_text + '\n\n'
ind_id = ''
o_html = ''
o_lj_ct = 0
o_lj_indent = False
for line in html.split('\n'):
if not o_lj_indent:
if line[0:13] == '$AN,"",A="IMG':
# Left Justified image detected.
ind_id = line.split('IMG')[1].split('"')[0]
line = line.replace('$IMIS$','IMIS' + ind_id)
line = line.replace('$IMIE$','IMIE' + ind_id)
o_lj_indent = True
if o_lj_indent:
o_lj_ct += 1
if o_lj_ct > 11:
line = '$AN,"",A="IMIE' + ind_id + '"$' + line
ind_id = ''
o_lj_ct = 0
o_lj_indent = False
line = line.replace('$IMIS$','')
line = line.replace('$IMIE$','')
o_html += line + '\n'
return hb_header + o_html

176
uriel_proxy

@ -1,176 +0,0 @@
#!/usr/bin/python
from uriel_preprocessor import preprocess
import os, socket, subprocess, sys, time, urlparse, uuid
HOST = '127.0.0.1'
PORT = 7202
DELIM_BIN_EOF = '*[U_EOF]'
DELIM_BIN_GET = '*[U_GET]'
DELIM_BIN_PUT = '*[U_PUT]'
DELIM_BIN_SOF = '*[U_SOF]'
MODE_LISTEN = 0
MODE_PUT_START = 1
MODE_GET_START = 2
URIEL_VER_STR = 'Uriel/0.1'
blk_size = 8
delay_ms = .001
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.bind((HOST, PORT))
except socket.error as msg:
sys.stdout.write('Error: failed to open socket\n')
sys.stdout.flush()
sys.exit()
s.listen(0)
sys.stdout.write('[uriel_proxy started]\n')
sys.stdout.flush()
while 1:
conn, addr = s.accept()
last_buf = ''
cmd_in = ''
rel_url = ''
history = []
hst_index = -1
state = MODE_LISTEN
while 1:
if 1==1:
data = conn.recv(1024)
cmd_in += data
if state == MODE_LISTEN:
if cmd_in.find(DELIM_BIN_GET) != -1:
state = MODE_GET_START
if state == MODE_GET_START:
if cmd_in.find('^') != -1:
get_file = cmd_in[cmd_in.find(DELIM_BIN_GET)+8:cmd_in.find('|')]
blk_ctr = 0
if get_file[0:2] == '//':
get_file = 'http:' + get_file
if get_file.find('://') != -1:
headers = { 'User-Agent': URIEL_VER_STR + ' (' + cmd_in.split('^')[0].rsplit('|')[1] + ')' }
if get_file.find('.uriel_img') != -1:
tmp_img_file = '/tmp/' + str(uuid.uuid4()) + '.bmp'
while os.path.exists(tmp_img_file):
tmp_img_file = '/tmp/' + str(uuid.uuid4()) + '.bmp'
file = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + get_file.split('.uriel_img')[0] + '" 2>/dev/null | gm convert -resize 100x100 - -colors 16 "' + tmp_img_file + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
file = open(tmp_img_file, "rb").read()
os.remove(tmp_img_file)
else:
file = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + get_file + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
else:
file = ''
if get_file == 'retry:send':
file = last_buf
if get_file == 'retry:sendZ':
tmp_z_file = '/tmp/' + str(uuid.uuid4()) + '.Z'
while os.path.exists(tmp_z_file):
tmp_z_file = '/tmp/' + str(uuid.uuid4()) + '.Z'
open(tmp_z_file, "wb").write(last_buf)
z = subprocess.Popen('tosz "' + tmp_z_file + '"', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
file = open(tmp_z_file.split('.Z')[0], "rb").read()
os.remove(tmp_z_file.split('.Z')[0])
if file == '':
file = open(get_file, "rb").read()
while blk_ctr < len(file):
conn.sendall(file[blk_ctr:blk_ctr+blk_size])
blk_ctr += blk_size
time.sleep(delay_ms)
conn.sendall(DELIM_BIN_EOF+'\x00')
last_buf = file
cmd_in = ""
state = MODE_LISTEN
if state == MODE_LISTEN:
if cmd_in.find(DELIM_BIN_PUT) != -1:
state = MODE_PUT_START
if state == MODE_PUT_START:
if cmd_in.find(DELIM_BIN_EOF) != -1:
put_files = cmd_in[cmd_in.find(DELIM_BIN_PUT)+8:cmd_in.find(DELIM_BIN_SOF)].split('|')
s_filename = put_files[0]
r_filename = put_files[1]
if r_filename == "":
r_filename = 'Xfer/' + s_filename.split('/')[len(s_filename.split('/'))-1]
open(r_filename,"wb").write(cmd_in[cmd_in.find(DELIM_BIN_SOF)+8:cmd_in.find(DELIM_BIN_EOF)])
cmd_in = ""
state = MODE_LISTEN
if cmd_in.find('^') != -1 and state == MODE_LISTEN:
user_agent = URIEL_VER_STR + ' (' + cmd_in.split('^')[0].rsplit('|')[1] + ')'
url = cmd_in.split('^')[0].rsplit('|')[0]
url = url.split('#')[0]
prot_ag_url = False
if url.lower()[0:4] != 'http':
if url[0:2] == '//':
url = 'http:' + url
prot_ag_url = True
if url.find('/') != -1 and not prot_ag_url:
if url.split('/')[0].find('.') != -1:
url = 'http://' + url
page_int = 0
if url == 'h:back':
if hst_index > 0:
hst_index -= 1
url = history[hst_index]['url']
page = history[hst_index]['page']
page_int = 1
if url == 'h:fwd':
if hst_index < len(history)-1:
hst_index += 1
url = history[hst_index]['url']
page = history[hst_index]['page']
page_int = 1
if page_int == 0:
if url.find('://') == -1:
url = rel_url + url
url = url[:url.find('//')] + '//' + url[url.find('//')+2:].replace('//','/')
headers = { 'User-Agent': user_agent }
data = subprocess.Popen('wget -O - -U "' + headers['User-Agent'] + '" "' + url + '" 2>/dev/null', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
page = preprocess(data, url)
if page.find('$AN,"",A="BINARY"$') != -1:
last_buf = data
hst_index += 1
history = history[0:hst_index]
history.append({'url':url, 'page':page})
u_page = page
page = ''
u_idx = 0
while u_idx < len(u_page):
if ord(u_page[u_idx:u_idx+1]) < 127:
page += u_page[u_idx:u_idx+1]
u_idx += 1
blk_ctr = 0
while blk_ctr < len(page):
conn.sendall(page[blk_ctr:blk_ctr+blk_size])
blk_ctr += blk_size
time.sleep(delay_ms)
conn.sendall('\xFF')
if url.find('://') != -1:
r_url = urlparse.urlparse(url)
rel_url = r_url[0] + '://' + r_url[1]
r_path = '/'
if r_url[2] != '':
if r_url[2][r_url[2].rfind('/'):].find('.') != -1:
r_path = r_url[2][:r_url[2].rfind('/')] + r_path
else:
r_path = r_url[2] + r_path
rel_url = rel_url + r_path
i_page = ''
page = ''
cmd_in = ''
url = ''
conn.close()
s.close()
Loading…
Cancel
Save