/*
* File: datauri.c
*
* Copyright (C) 2006 Jorge Arellano Cid <jcid@dillo.org>
*
* Filter dpi for the "data:" URI scheme (RFC 2397).
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <glib.h>
#include "../dpip/dpip.h"
#include "dpiutil.h"
/*
* Debugging macros
*/
#define _MSG(fmt...)
#define MSG(fmt...) g_printerr("[datauri dpi]: " fmt)
/*
* Global variables
*/
static SockHandler *sh = NULL;
int b64decode(unsigned char* str)
{
unsigned char *cur, *start;
int d, dlast, phase;
unsigned char c;
static int table[256] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
};
d = dlast = phase = 0;
start = str;
for (cur = str; *cur != '\0'; ++cur )
{
// jer: treat line endings as physical breaks.
//if (*cur == '\n' || *cur == '\r'){phase = dlast = 0; continue;}
d = table[(int)*cur];
if(d != -1)
{
switch(phase)
{
case 0:
++phase;
break;
case 1:
c = ((dlast << 2) | ((d & 0x30) >> 4));
*str++ = c;
++phase;
break;
case 2:
c = (((dlast & 0xf) << 4) | ((d & 0x3c) >> 2));
*str++ = c;
++phase;
break;
case 3:
c = (((dlast & 0x03 ) << 6) | d);
*str++ = c;
phase = 0;
break;
}
dlast = d;
}
}
*str = '\0';
return str - start;
}
/* Modified from src/url.c --------------------------------------------------*/
/*
* Given an hex octet (e.g., e3, 2F, 20), return the corresponding
* character if the octet is valid, and -1 otherwise
*/
static int Url_decode_hex_octet(const gchar *s)
{
gint hex_value;
gchar *tail, hex[3];
if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
hex[2] = 0;
hex_value = strtol(hex, &tail, 16);
if (tail - hex == 2)
return hex_value;
}
return -1;
}
/*
* Parse possible hexadecimal octets in the URI path.
* Returns a new allocated string.
*/
gchar *a_Url_decode_hex_str(const gchar *str, size_t *p_sz)
{
gchar *new_str, *dest;
int i, val;
if (!str) {
*p_sz = 0;
return NULL;
}
dest = new_str = g_new(gchar, strlen(str) + 1);
for (i = 0; str[i]; i++) {
*dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
i+=2, val : str[i];
}
*dest = 0;
new_str = g_realloc(new_str, sizeof(gchar) * (dest - new_str + 1));
*p_sz = (size_t)(dest - new_str);
return new_str;
}
/* end ----------------------------------------------------------------------*/
/*
* Send decoded data to dillo in an HTTP envelope.
*/
void send_decoded_data(const char *url, const char *mime_type,
unsigned char *data, size_t data_sz)
{
char *d_cmd;
/* Send dpip tag */
d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
sock_handler_write_str(sh, d_cmd, 1);
g_free(d_cmd);
/* Send HTTP header. */
sock_handler_write_str(sh, "Content-type: ", 0);
sock_handler_write_str(sh, mime_type, 0);
sock_handler_write_str(sh, "\n\n", 1);
/* Send message */
sock_handler_write(sh, (char *)data, data_sz, 0);
}
void send_failure_message(const char *url, const char *mime_type,
unsigned char *data, size_t data_sz)
{
char *d_cmd;
char buf[1024];
const char *msg =
"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>\n"
"<html><body>\n"
"<hr><h1>Datauri dpi</h1><hr>\n"
"<p><b>Can't parse datauri:</b><br>\n";
const char *msg_mime_type="text/html";
/* Send dpip tag */
d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
sock_handler_write_str(sh, d_cmd, 1);
g_free(d_cmd);
/* Send HTTP header. */
sock_handler_write_str(sh, "Content-type: ", 0);
sock_handler_write_str(sh, msg_mime_type, 0);
sock_handler_write_str(sh, "\n\n", 1);
/* Send message */
sock_handler_write_str(sh, msg, 0);
/* send some debug info */
g_snprintf(buf, 1024, "mime_type: %s<br>data size: %d<br>data: %s<br>",
mime_type, (int)data_sz, data);
sock_handler_write_str(sh, buf, 0);
/* close page */
sock_handler_write_str(sh, "</body></html>", 0);
}
/*
* Get mime type from the data URI.
* todo: there's no point in handling "charset" because current dillo
* only handles ISO-LATIN-1. The FLTK2 version (utf-8) could use it in the
* future.
*/
char *datauri_get_mime(char *url)
{
char buf[256];
char *mime_type = NULL, *p;
size_t len = 0;
if (g_strncasecmp(url, "data:", 5) == 0) {
if ((p = strchr(url, ',')) && p - url < 256) {
url += 5;
len = p - url;
strncpy(buf, url, len);
buf[len] = 0;
/* strip ";base64" */
if (len >= 7 && g_strcasecmp(buf + len - 7, ";base64") == 0) {
len -= 7;
buf[len] = 0;
}
}
/* that's it, now handle omitted types */
if (len == 0) {
mime_type = g_strdup("text/plain;charset=US-ASCII");
} else if (!g_strncasecmp(buf, "charset", 7)) {
mime_type = g_strconcat("text/plain", buf, NULL);
} else {
mime_type = g_strdup(buf);
}
}
return mime_type;
}
/*
* Return a decoded data string.
*/
unsigned char *datauri_get_data(char *url, size_t *p_sz)
{
char *p;
int is_base64 = 0;
unsigned char *data = NULL;
if ((p = strchr(url, ',')) && p - url >= 12 && /* "data:;base64" */
g_strncasecmp(p - 7, ";base64", 7) == 0) {
is_base64 = 1;
}
if (p) {
++p;
if (is_base64) {
data = (unsigned char *)g_strdup(p);
*p_sz = (size_t) b64decode(data);
} else {
data = (unsigned char *)a_Url_decode_hex_str(p, p_sz);
}
} else {
data = g_strdup("");
*p_sz = 0;
}
return data;
}
/*
*
*/
int main(void)
{
gchar *dpip_tag = NULL, *cmd = NULL, *url = NULL, *mime_type;
unsigned char *data;
size_t data_size = 0;
/* Initialize the SockHandler */
sh = sock_handler_new(STDIN_FILENO, STDOUT_FILENO, 8*1024);
/* wget may need to write a temporary file... */
chdir("/tmp");
/* Read the dpi command from STDIN */
dpip_tag = sock_handler_read(sh);
MSG("[%s]\n", dpip_tag);
cmd = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "cmd");
url = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "url");
if (!cmd || !url) {
MSG("Error, cmd=%s, url=%s\n", cmd, url);
exit (EXIT_FAILURE);
}
/* Parse the data URI */
mime_type = datauri_get_mime(url);
data = datauri_get_data(url, &data_size);
MSG("mime_type: %s\n", mime_type);
MSG("data_size: %d\n", data_size);
MSG("data: {%s}\n", data);
if (mime_type && data) {
/* good URI */
send_decoded_data(url, mime_type, data, data_size);
} else {
/* malformed URI */
send_failure_message(url, mime_type, data, data_size);
}
g_free(data);
g_free(mime_type);
g_free(url);
g_free(cmd);
g_free(dpip_tag);
/* Finish the SockHandler */
sock_handler_close(sh);
sock_handler_free(sh);
return 0;
}