/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2009-2017 Brazil
Copyright(C) 2018 Kouhei Sutou <kou@clear-code.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "grn_ctx_impl.h"
#include "grn_db.h"
#include "grn_load.h"
#include "grn_obj.h"
#include "grn_util.h"
static void
grn_loader_save_error(grn_ctx *ctx, grn_loader *loader)
{
loader->rc = ctx->rc;
grn_strcpy(loader->errbuf, GRN_CTX_MSGSIZE, ctx->errbuf);
}
static grn_obj *
values_add(grn_ctx *ctx, grn_loader *loader)
{
grn_obj *res;
uint32_t curr_size = loader->values_size * sizeof(grn_obj);
if (curr_size < GRN_TEXT_LEN(&loader->values)) {
res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size);
res->header.domain = GRN_DB_TEXT;
GRN_BULK_REWIND(res);
} else {
if (grn_bulk_space(ctx, &loader->values, sizeof(grn_obj))) { return NULL; }
res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size);
GRN_TEXT_INIT(res, 0);
}
loader->values_size++;
loader->last = res;
return res;
}
static grn_obj *
values_next(grn_ctx *ctx, grn_obj *value)
{
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET ||
value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
value += GRN_UINT32_VALUE(value);
}
return value + 1;
}
static int
values_len(grn_ctx *ctx, grn_obj *head, grn_obj *tail)
{
int len;
for (len = 0; head < tail; head = values_next(ctx, head), len++) ;
return len;
}
static grn_id
loader_add(grn_ctx *ctx, grn_obj *key)
{
int added = 0;
grn_loader *loader = &ctx->impl->loader;
grn_id id = grn_table_add_by_key(ctx, loader->table, key, &added);
if (id == GRN_ID_NIL) {
grn_loader_save_error(ctx, loader);
return id;
}
if (!added && loader->ifexists) {
grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->ifexists, 0);
grn_obj *result;
GRN_RECORD_SET(ctx, v, id);
result = grn_expr_exec(ctx, loader->ifexists, 0);
if (!grn_obj_is_true(ctx, result)) {
id = 0;
}
}
return id;
}
static void
add_weight_vector(grn_ctx *ctx,
grn_obj *column,
grn_obj *value,
grn_obj *vector)
{
unsigned int i, n;
grn_obj weight_buffer;
n = GRN_UINT32_VALUE(value);
GRN_UINT32_INIT(&weight_buffer, 0);
for (i = 0; i < n; i += 2) {
grn_rc rc;
grn_obj *key, *weight;
key = value + 1 + i;
weight = key + 1;
GRN_BULK_REWIND(&weight_buffer);
rc = grn_obj_cast(ctx, weight, &weight_buffer, GRN_TRUE);
if (rc != GRN_SUCCESS) {
grn_obj *range;
range = grn_ctx_at(ctx, weight_buffer.header.domain);
ERR_CAST(column, range, weight);
grn_obj_unlink(ctx, range);
break;
}
grn_vector_add_element(ctx,
vector,
GRN_BULK_HEAD(key),
GRN_BULK_VSIZE(key),
GRN_UINT32_VALUE(&weight_buffer),
key->header.domain);
}
GRN_OBJ_FIN(ctx, &weight_buffer);
}
static void
set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector)
{
int n = GRN_UINT32_VALUE(vector);
grn_obj buf, *v = vector + 1;
grn_id range_id;
grn_obj *range;
range_id = DB_OBJ(column)->range;
range = grn_ctx_at(ctx, range_id);
if (grn_obj_is_table(ctx, range)) {
GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, range_id);
while (n--) {
grn_bool cast_failed = GRN_FALSE;
grn_obj record, *element = v;
if (range_id != element->header.domain) {
GRN_RECORD_INIT(&record, 0, range_id);
if (grn_obj_cast(ctx, element, &record, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &record;
}
if (!cast_failed) {
GRN_UINT32_PUT(ctx, &buf, GRN_RECORD_VALUE(element));
}
if (element == &record) { GRN_OBJ_FIN(ctx, element); }
v = values_next(ctx, v);
}
} else {
if (((struct _grn_type *)range)->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
GRN_TEXT_INIT(&buf, GRN_OBJ_VECTOR);
while (n--) {
switch (v->header.domain) {
case GRN_DB_TEXT :
{
grn_bool cast_failed = GRN_FALSE;
grn_obj casted_element, *element = v;
if (range_id != element->header.domain) {
GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id);
if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &casted_element;
}
if (!cast_failed) {
grn_vector_add_element(ctx, &buf,
GRN_TEXT_VALUE(element),
GRN_TEXT_LEN(element),
0,
element->header.domain);
}
if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); }
break;
}
case GRN_JSON_LOAD_OPEN_BRACE :
add_weight_vector(ctx, column, v, &buf);
n -= GRN_UINT32_VALUE(v);
break;
default :
ERR(GRN_INVALID_ARGUMENT, "array must contain string or object");
break;
}
v = values_next(ctx, v);
}
} else {
grn_id value_size = ((grn_db_obj *)range)->range;
GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, range_id);
while (n--) {
grn_bool cast_failed = GRN_FALSE;
grn_obj casted_element, *element = v;
if (range_id != element->header.domain) {
GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id);
if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &casted_element;
}
if (!cast_failed) {
grn_bulk_write(ctx, &buf, GRN_TEXT_VALUE(element), value_size);
}
if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); }
v = values_next(ctx, v);
}
}
}
grn_obj_set_value(ctx, column, id, &buf, GRN_OBJ_SET);
GRN_OBJ_FIN(ctx, &buf);
}
static void
set_weight_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *value)
{
if (!grn_obj_is_weight_vector_column(ctx, column)) {
char column_name[GRN_TABLE_MAX_KEY_SIZE];
int column_name_size;
column_name_size = grn_obj_name(ctx, column, column_name,
GRN_TABLE_MAX_KEY_SIZE);
ERR(GRN_INVALID_ARGUMENT,
"<%.*s>: columns except weight vector column don't support object value",
column_name_size, column_name);
return;
}
{
grn_obj vector;
GRN_TEXT_INIT(&vector, GRN_OBJ_VECTOR);
add_weight_vector(ctx, column, value, &vector);
grn_obj_set_value(ctx, column, id, &vector, GRN_OBJ_SET);
GRN_OBJ_FIN(ctx, &vector);
}
}
static grn_inline int
name_equal(const char *p, unsigned int size, const char *name)
{
if (strlen(name) != size) { return 0; }
if (*p != GRN_DB_PSEUDO_COLUMN_PREFIX) { return 0; }
return !memcmp(p + 1, name + 1, size - 1);
}
static void
report_set_column_value_failure(grn_ctx *ctx,
grn_obj *key,
const char *column_name,
unsigned int column_name_size,
grn_obj *column_value)
{
grn_obj key_inspected, column_value_inspected;
GRN_TEXT_INIT(&key_inspected, 0);
GRN_TEXT_INIT(&column_value_inspected, 0);
grn_inspect_limited(ctx, &key_inspected, key);
grn_inspect_limited(ctx, &column_value_inspected, column_value);
GRN_LOG(ctx, GRN_LOG_ERROR,
"[table][load] failed to set column value: %s: "
"key: <%.*s>, column: <%.*s>, value: <%.*s>",
ctx->errbuf,
(int)GRN_TEXT_LEN(&key_inspected),
GRN_TEXT_VALUE(&key_inspected),
column_name_size,
column_name,
(int)GRN_TEXT_LEN(&column_value_inspected),
GRN_TEXT_VALUE(&column_value_inspected));
GRN_OBJ_FIN(ctx, &key_inspected);
GRN_OBJ_FIN(ctx, &column_value_inspected);
}
static grn_id
parse_id_value(grn_ctx *ctx, grn_obj *value)
{
switch (value->header.type) {
case GRN_DB_UINT32 :
return GRN_UINT32_VALUE(value);
case GRN_DB_INT32 :
return GRN_INT32_VALUE(value);
default :
{
grn_id id = GRN_ID_NIL;
grn_obj casted_value;
GRN_UINT32_INIT(&casted_value, 0);
if (grn_obj_cast(ctx, value, &casted_value, GRN_FALSE) != GRN_SUCCESS) {
grn_obj inspected;
GRN_TEXT_INIT(&inspected, 0);
grn_inspect(ctx, &inspected, value);
ERR(GRN_INVALID_ARGUMENT,
"<%s>: failed to cast to <UInt32>: <%.*s>",
GRN_COLUMN_NAME_ID,
(int)GRN_TEXT_LEN(&inspected),
GRN_TEXT_VALUE(&inspected));
GRN_OBJ_FIN(ctx, &inspected);
} else {
id = GRN_UINT32_VALUE(&casted_value);
}
GRN_OBJ_FIN(ctx, &casted_value);
return id;
}
}
}
static void
grn_loader_apply_each(grn_ctx *ctx,
grn_loader *loader,
grn_id id)
{
grn_obj *var;
if (!loader->each) {
return;
}
var = grn_expr_get_var_by_offset(ctx, loader->each, 0);
GRN_RECORD_SET(ctx, var, id);
grn_expr_exec(ctx, loader->each, 0);
}
static void
bracket_close_set_values(grn_ctx *ctx,
grn_loader *loader,
grn_id id,
grn_obj *key,
grn_obj *values,
uint32_t n_values)
{
uint32_t i;
grn_obj *value;
grn_obj **columns; /* Columns except _id and _key. */
columns = (grn_obj **)GRN_BULK_HEAD(&loader->columns);
for (i = 0, value = values;
i < n_values;
i++, value = values_next(ctx, value)) {
grn_obj *column;
if (i == loader->id_offset || i == loader->key_offset) {
/* Skip _id and _key, because it's already used to get id. */
continue;
}
column = *columns;
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
set_vector(ctx, column, id, value);
} else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
set_weight_vector(ctx, column, id, value);
} else {
grn_obj_set_value(ctx, column, id, value, GRN_OBJ_SET);
}
if (ctx->rc != GRN_SUCCESS) {
char column_name[GRN_TABLE_MAX_KEY_SIZE];
unsigned int column_name_size;
grn_loader_save_error(ctx, loader);
column_name_size = grn_obj_name(ctx, column, column_name,
GRN_TABLE_MAX_KEY_SIZE);
report_set_column_value_failure(ctx, key,
column_name, column_name_size,
value);
loader->n_column_errors++;
ERRCLR(ctx);
}
columns++;
}
}
static void
bracket_close(grn_ctx *ctx, grn_loader *loader)
{
grn_id id = GRN_ID_NIL;
grn_obj *value_begin, *value_end;
grn_obj *value;
grn_obj *key = NULL;
uint32_t i, begin;
uint32_t nvalues; /* Number of values in brackets. */
uint32_t depth;
grn_bool is_record_load = GRN_FALSE;
GRN_UINT32_POP(&loader->level, begin);
value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin;
value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size;
value = value_begin;
GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET);
GRN_UINT32_SET(ctx, value, loader->values_size - begin - 1);
value++;
depth = GRN_BULK_VSIZE(&loader->level);
if (depth > sizeof(uint32_t) * loader->emit_level) {
return;
}
if (depth == 0 || !loader->table ||
loader->columns_status == GRN_LOADER_COLUMNS_BROKEN) {
goto exit;
}
nvalues = values_len(ctx, value, value_end);
if (loader->columns_status == GRN_LOADER_COLUMNS_UNSET) {
/*
* Target columns and _id or _key are not specified yet and values are
* handled as column names and "_id" or "_key".
*/
for (i = 0; i < nvalues; i++) {
const char *col_name;
unsigned int col_name_size;
grn_obj *col;
if (value->header.domain != GRN_DB_TEXT) {
grn_obj buffer;
GRN_TEXT_INIT(&buffer, 0);
grn_inspect(ctx, &buffer, value);
ERR(GRN_INVALID_ARGUMENT,
"column name must be string: <%.*s>",
(int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer));
grn_loader_save_error(ctx, loader);
GRN_OBJ_FIN(ctx, &buffer);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
col_name = GRN_TEXT_VALUE(value);
col_name_size = GRN_TEXT_LEN(value);
col = grn_obj_column(ctx, loader->table, col_name, col_name_size);
if (!col) {
ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>",
col_name_size, col_name);
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_ID)) {
grn_obj_unlink(ctx, col);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
loader->id_offset = i;
} else if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_KEY)) {
grn_obj_unlink(ctx, col);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
loader->key_offset = i;
} else {
GRN_PTR_PUT(ctx, &loader->columns, col);
}
value++;
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
if (loader->id_offset == -1 && loader->key_offset == -1) {
ERR(GRN_INVALID_ARGUMENT, "missing id or key column");
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
break;
}
loader->columns_status = GRN_LOADER_COLUMNS_SET;
goto exit;
}
is_record_load = GRN_TRUE;
/* Target columns and _id or _key are already specified. */
if (!nvalues) {
/*
* Accept empty arrays because a dump command may output a load command
* which contains empty arrays for a table with deleted records.
*/
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
} else {
uint32_t expected_nvalues =
GRN_BULK_VSIZE(&loader->columns) / sizeof(grn_obj *);
if (loader->id_offset != -1 || loader->key_offset != -1) {
expected_nvalues++;
}
if (nvalues != expected_nvalues) {
ERR(GRN_INVALID_ARGUMENT,
"unexpected #values: expected:%u, actual:%u",
expected_nvalues, nvalues);
grn_loader_save_error(ctx, loader);
goto exit;
}
if (loader->id_offset != -1) {
grn_obj *id_bulk = value + loader->id_offset;
id = parse_id_value(ctx, id_bulk);
if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
} else if (loader->key_offset != -1) {
key = value + loader->key_offset;
id = loader_add(ctx, key);
} else {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
}
if (id == GRN_ID_NIL) {
/* Target record is not available. */
goto exit;
}
if (loader->lock_table) {
GRN_TABLE_LOCK_BEGIN(ctx, loader->table) {
if (grn_table_at(ctx, loader->table, id) == id) {
bracket_close_set_values(ctx, loader, id, key, value, nvalues);
grn_loader_apply_each(ctx, loader, id);
}
} GRN_TABLE_LOCK_END(ctx, table);
} else {
bracket_close_set_values(ctx, loader, id, key, value, nvalues);
grn_loader_apply_each(ctx, loader, id);
}
loader->nrecords++;
exit:
if (is_record_load) {
if (ctx->rc != GRN_SUCCESS) {
loader->n_record_errors++;
}
if (loader->output_ids) {
GRN_UINT32_PUT(ctx, &(loader->ids), id);
}
if (loader->output_errors) {
GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc);
grn_vector_add_element(ctx,
&(loader->error_messages),
ctx->errbuf,
strlen(ctx->errbuf),
0,
GRN_DB_TEXT);
}
}
loader->values_size = begin;
ERRCLR(ctx);
}
static void
brace_close_set_values(grn_ctx *ctx,
grn_loader *loader,
grn_id id,
grn_obj *key,
grn_obj *values_begin,
grn_obj *values_end)
{
grn_obj *value;
for (value = values_begin;
value + 1 < values_end;
value = values_next(ctx, value)) {
const char *name = GRN_TEXT_VALUE(value);
unsigned int name_size = GRN_TEXT_LEN(value);
grn_obj *column;
value++;
if (name_equal(name, name_size, GRN_COLUMN_NAME_ID) ||
name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) {
/* Skip _id and _key, because it's already used to get id. */
continue;
}
column = grn_obj_column(ctx, loader->table, name, name_size);
if (!column) {
char table_name[GRN_TABLE_MAX_KEY_SIZE];
int table_name_size;
table_name_size = grn_obj_name(ctx,
loader->table,
table_name,
GRN_TABLE_MAX_KEY_SIZE);
GRN_LOG(ctx, GRN_LOG_ERROR, "[load] nonexistent column: <%.*s.%.*s>",
table_name_size,
table_name,
(int)name_size,
name);
/* Automatic column creation is disabled. */
/*
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
grn_obj *v = value + 1;
col = grn_column_create(ctx, loader->table, name, name_size,
NULL, GRN_OBJ_PERSISTENT|GRN_OBJ_COLUMN_VECTOR,
grn_ctx_at(ctx, v->header.domain));
} else {
col = grn_column_create(ctx, loader->table, name, name_size,
NULL, GRN_OBJ_PERSISTENT,
grn_ctx_at(ctx, value->header.domain));
}
*/
} else {
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
set_vector(ctx, column, id, value);
} else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
set_weight_vector(ctx, column, id, value);
} else {
grn_obj_set_value(ctx, column, id, value, GRN_OBJ_SET);
}
if (ctx->rc != GRN_SUCCESS) {
grn_loader_save_error(ctx, loader);
report_set_column_value_failure(ctx, key, name, name_size, value);
loader->n_column_errors++;
ERRCLR(ctx);
}
grn_obj_unlink(ctx, column);
}
}
}
static void
brace_close(grn_ctx *ctx, grn_loader *loader)
{
grn_id id = GRN_ID_NIL;
grn_obj *value, *value_begin, *value_end;
grn_obj *id_bulk = NULL, *key = NULL;
uint32_t begin;
GRN_UINT32_POP(&loader->level, begin);
value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin;
value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size;
GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACE);
GRN_UINT32_SET(ctx, value_begin, loader->values_size - begin - 1);
value_begin++;
if (GRN_BULK_VSIZE(&loader->level) > sizeof(uint32_t) * loader->emit_level) {
return;
}
if (!loader->table) {
goto exit;
}
/* Scan values to find _id or _key. */
for (value = value_begin; value + 1 < value_end;
value = values_next(ctx, value)) {
const char *name = GRN_TEXT_VALUE(value);
unsigned int name_size = GRN_TEXT_LEN(value);
if (value->header.domain != GRN_DB_TEXT) {
grn_obj buffer;
GRN_TEXT_INIT(&buffer, 0);
grn_inspect(ctx, &buffer, value);
GRN_LOG(ctx, GRN_LOG_ERROR,
"column name must be string: <%.*s>",
(int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer));
GRN_OBJ_FIN(ctx, &buffer);
goto exit;
}
value++;
if (name_equal(name, name_size, GRN_COLUMN_NAME_ID)) {
if (id_bulk || key) {
if (loader->table->header.type == GRN_TABLE_NO_KEY) {
GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated '_id' column");
goto exit;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR,
"duplicated key columns: %s and %s",
id_bulk ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_ID);
goto exit;
}
}
id_bulk = value;
} else if (name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) {
if (id_bulk || key) {
GRN_LOG(ctx, GRN_LOG_ERROR,
"duplicated key columns: %s and %s",
id_bulk ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_KEY);
goto exit;
}
key = value;
}
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
/* The target table requires _id or _key. */
if (!id_bulk && !key) {
GRN_LOG(ctx, GRN_LOG_ERROR, "neither _key nor _id is assigned");
goto exit;
}
break;
default :
/* The target table does not have _key. */
if (key) {
GRN_LOG(ctx, GRN_LOG_ERROR, "nonexistent key value");
goto exit;
}
break;
}
if (id_bulk) {
id = parse_id_value(ctx, id_bulk);
if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) {
if (ctx->rc == GRN_SUCCESS) {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
}
} else if (key) {
id = loader_add(ctx, key);
} else {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
if (id == GRN_ID_NIL) {
/* Target record is not available. */
goto exit;
}
if (loader->lock_table) {
GRN_TABLE_LOCK_BEGIN(ctx, loader->table) {
if (grn_table_at(ctx, loader->table, id) == id) {
brace_close_set_values(ctx, loader, id, key, value_begin, value_end);
grn_loader_apply_each(ctx, loader, id);
}
} GRN_TABLE_LOCK_END(ctx, loader->table);
} else {
brace_close_set_values(ctx, loader, id, key, value_begin, value_end);
grn_loader_apply_each(ctx, loader, id);
}
loader->nrecords++;
exit:
if (ctx->rc != GRN_SUCCESS) {
loader->n_record_errors++;
}
if (loader->output_ids) {
GRN_UINT32_PUT(ctx, &(loader->ids), id);
}
if (loader->output_errors) {
GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc);
grn_vector_add_element(ctx,
&(loader->error_messages),
ctx->errbuf,
strlen(ctx->errbuf),
0,
GRN_DB_TEXT);
}
loader->values_size = begin;
ERRCLR(ctx);
}
#define JSON_READ_OPEN_BRACKET() do {\
GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\
values_add(ctx, loader);\
loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACKET;\
loader->stat = GRN_LOADER_TOKEN;\
str++;\
} while (0)
#define JSON_READ_OPEN_BRACE() do {\
GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\
values_add(ctx, loader);\
loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACE;\
loader->stat = GRN_LOADER_TOKEN;\
str++;\
} while (0)
static void
json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len)
{
const char *const beg = str;
char c;
int len;
const char *se = str + str_len;
while (str < se) {
c = *str;
switch (loader->stat) {
case GRN_LOADER_BEGIN :
if ((len = grn_isspace(str, ctx->encoding))) {
str += len;
continue;
}
switch (c) {
case '[' :
JSON_READ_OPEN_BRACKET();
break;
case '{' :
JSON_READ_OPEN_BRACE();
break;
default :
ERR(GRN_INVALID_ARGUMENT,
"JSON must start with '[' or '{': <%.*s>", str_len, beg);
loader->stat = GRN_LOADER_END;
break;
}
break;
case GRN_LOADER_TOKEN :
if ((len = grn_isspace(str, ctx->encoding))) {
str += len;
continue;
}
switch (c) {
case '"' :
loader->stat = GRN_LOADER_STRING;
values_add(ctx, loader);
str++;
break;
case '[' :
JSON_READ_OPEN_BRACKET();
break;
case '{' :
JSON_READ_OPEN_BRACE();
break;
case ':' :
str++;
break;
case ',' :
str++;
break;
case ']' :
bracket_close(ctx, loader);
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
if (ctx->rc == GRN_CANCEL) {
loader->stat = GRN_LOADER_END;
}
str++;
break;
case '}' :
brace_close(ctx, loader);
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
if (ctx->rc == GRN_CANCEL) {
loader->stat = GRN_LOADER_END;
}
str++;
break;
case '+' : case '-' : case '0' : case '1' : case '2' : case '3' :
case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
loader->stat = GRN_LOADER_NUMBER;
values_add(ctx, loader);
break;
default :
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) {
loader->stat = GRN_LOADER_SYMBOL;
values_add(ctx, loader);
} else {
if ((len = grn_charlen(ctx, str, se))) {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg);
GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^");
str += len;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
str = se;
}
}
break;
}
break;
case GRN_LOADER_SYMBOL :
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
('0' <= c && c <= '9') || ('_' == c)) {
GRN_TEXT_PUTC(ctx, loader->last, c);
str++;
} else {
char *v = GRN_TEXT_VALUE(loader->last);
switch (*v) {
case 'n' :
if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) {
loader->last->header.domain = GRN_DB_VOID;
GRN_BULK_REWIND(loader->last);
}
break;
case 't' :
if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) {
loader->last->header.domain = GRN_DB_BOOL;
GRN_BOOL_SET(ctx, loader->last, GRN_TRUE);
}
break;
case 'f' :
if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) {
loader->last->header.domain = GRN_DB_BOOL;
GRN_BOOL_SET(ctx, loader->last, GRN_FALSE);
}
break;
default :
break;
}
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
}
break;
case GRN_LOADER_NUMBER :
switch (c) {
case '+' : case '-' : case '.' : case 'e' : case 'E' :
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
GRN_TEXT_PUTC(ctx, loader->last, c);
str++;
break;
default :
{
const char *cur, *str = GRN_BULK_HEAD(loader->last);
const char *str_end = GRN_BULK_CURR(loader->last);
int64_t i = grn_atoll(str, str_end, &cur);
if (cur == str_end) {
loader->last->header.domain = GRN_DB_INT64;
GRN_INT64_SET(ctx, loader->last, i);
} else if (cur != str) {
uint64_t i = grn_atoull(str, str_end, &cur);
if (cur == str_end) {
loader->last->header.domain = GRN_DB_UINT64;
GRN_UINT64_SET(ctx, loader->last, i);
} else if (cur != str) {
double d;
char *end;
grn_obj buf;
GRN_TEXT_INIT(&buf, 0);
GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last));
GRN_TEXT_PUTC(ctx, &buf, '\0');
errno = 0;
d = strtod(GRN_TEXT_VALUE(&buf), &end);
if (!errno && end + 1 == GRN_BULK_CURR(&buf)) {
loader->last->header.domain = GRN_DB_FLOAT;
GRN_FLOAT_SET(ctx, loader->last, d);
}
GRN_OBJ_FIN(ctx, &buf);
}
}
}
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
break;
}
break;
case GRN_LOADER_STRING :
switch (c) {
case '\\' :
loader->stat = GRN_LOADER_STRING_ESC;
str++;
break;
case '"' :
str++;
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
/*
*(GRN_BULK_CURR(loader->last)) = '\0';
GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last));
*/
break;
default :
if ((len = grn_charlen(ctx, str, se))) {
GRN_TEXT_PUT(ctx, loader->last, str, len);
str += len;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
str = se;
}
break;
}
break;
case GRN_LOADER_STRING_ESC :
switch (c) {
case 'b' :
GRN_TEXT_PUTC(ctx, loader->last, '\b');
loader->stat = GRN_LOADER_STRING;
break;
case 'f' :
GRN_TEXT_PUTC(ctx, loader->last, '\f');
loader->stat = GRN_LOADER_STRING;
break;
case 'n' :
GRN_TEXT_PUTC(ctx, loader->last, '\n');
loader->stat = GRN_LOADER_STRING;
break;
case 'r' :
GRN_TEXT_PUTC(ctx, loader->last, '\r');
loader->stat = GRN_LOADER_STRING;
break;
case 't' :
GRN_TEXT_PUTC(ctx, loader->last, '\t');
loader->stat = GRN_LOADER_STRING;
break;
case 'u' :
loader->stat = GRN_LOADER_UNICODE0;
break;
default :
GRN_TEXT_PUTC(ctx, loader->last, c);
loader->stat = GRN_LOADER_STRING;
break;
}
str++;
break;
case GRN_LOADER_UNICODE0 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar = (c - '0') * 0x1000;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar = (c - 'a' + 10) * 0x1000;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar = (c - 'A' + 10) * 0x1000;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE1;
str++;
break;
case GRN_LOADER_UNICODE1 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0') * 0x100;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10) * 0x100;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10) * 0x100;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE2;
str++;
break;
case GRN_LOADER_UNICODE2 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0') * 0x10;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10) * 0x10;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10) * 0x10;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE3;
str++;
break;
case GRN_LOADER_UNICODE3 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0');
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10);
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10);
break;
default :
;// todo : error
}
{
uint32_t u = loader->unichar;
if (u >= 0xd800 && u <= 0xdbff) { /* High-surrogate code points */
loader->unichar_hi = u;
loader->stat = GRN_LOADER_STRING;
str++;
break;
}
if (u >= 0xdc00 && u <= 0xdfff) { /* Low-surrogate code points */
u = 0x10000 + (loader->unichar_hi - 0xd800) * 0x400 + u - 0xdc00;
}
if (u < 0x80) {
GRN_TEXT_PUTC(ctx, loader->last, u);
} else {
if (u < 0x800) {
GRN_TEXT_PUTC(ctx, loader->last, (u >> 6) | 0xc0);
} else {
if (u < 0x10000) {
GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0);
} else {
GRN_TEXT_PUTC(ctx, loader->last, (u >> 18) | 0xf0);
GRN_TEXT_PUTC(ctx, loader->last, ((u >> 12) & 0x3f) | 0x80);
}
GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80);
}
GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80);
}
}
loader->stat = GRN_LOADER_STRING;
str++;
break;
case GRN_LOADER_END :
str = se;
break;
}
}
}
#undef JSON_READ_OPEN_BRACKET
#undef JSON_READ_OPEN_BRACE
/*
* grn_loader_parse_columns parses a columns parameter.
* Columns except _id and _key are appended to loader->columns.
* If it contains _id or _key, loader->id_offset or loader->key_offset is set.
*/
static grn_rc
grn_loader_parse_columns(grn_ctx *ctx, grn_loader *loader,
const char *str, unsigned int str_size)
{
const char *ptr = str, *ptr_end = ptr + str_size, *rest;
const char *tokens[256], *token_end;
while (ptr < ptr_end) {
int i, n = grn_tokenize(ptr, ptr_end - ptr, tokens, 256, &rest);
for (i = 0; i < n; i++) {
grn_obj *column;
token_end = tokens[i];
while (ptr < token_end && (' ' == *ptr || ',' == *ptr)) {
ptr++;
}
column = grn_obj_column(ctx, loader->table, ptr, token_end - ptr);
if (!column) {
ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>",
(int)(token_end - ptr), ptr);
return ctx->rc;
}
if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_ID)) {
grn_obj_unlink(ctx, column);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
return ctx->rc;
}
loader->id_offset = i;
} else if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_KEY)) {
grn_obj_unlink(ctx, column);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
return ctx->rc;
}
loader->key_offset = i;
} else {
GRN_PTR_PUT(ctx, &loader->columns, column);
}
ptr = token_end;
}
ptr = rest;
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
if (loader->id_offset == -1 && loader->key_offset == -1) {
ERR(GRN_INVALID_ARGUMENT, "missing id or key column");
return ctx->rc;
}
break;
}
return ctx->rc;
}
static grn_com_addr *addr;
void
grn_load_internal(grn_ctx *ctx, grn_load_input *input)
{
grn_loader *loader = &ctx->impl->loader;
loader->emit_level = input->emit_level;
if (ctx->impl->edge) {
grn_edge *edge = grn_edges_add_communicator(ctx, addr);
grn_obj *msg = grn_msg_open(ctx, edge->com, &ctx->impl->edge->send_old);
/* build msg */
grn_edge_dispatch(ctx, edge, msg);
}
if (input->table.length > 0) {
grn_ctx_loader_clear(ctx);
loader->input_type = input->type;
if (grn_db_check_name(ctx, input->table.value, input->table.length)) {
GRN_DB_CHECK_NAME_ERR("[table][load]",
input->table.value,
(int)(input->table.length));
loader->stat = GRN_LOADER_END;
return;
}
loader->table = grn_ctx_get(ctx, input->table.value, input->table.length);
if (!loader->table) {
ERR(GRN_INVALID_ARGUMENT,
"nonexistent table: <%.*s>",
(int)(input->table.length),
input->table.value);
loader->stat = GRN_LOADER_END;
return;
}
if (input->columns.length > 0) {
grn_rc rc = grn_loader_parse_columns(ctx,
loader,
input->columns.value,
input->columns.length);
if (rc != GRN_SUCCESS) {
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
loader->stat = GRN_LOADER_END;
return;
}
loader->columns_status = GRN_LOADER_COLUMNS_SET;
}
if (input->if_exists.length > 0) {
grn_obj *v;
GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->ifexists, v);
if (loader->ifexists && v) {
grn_expr_parse(ctx,
loader->ifexists,
input->if_exists.value,
input->if_exists.length,
NULL, GRN_OP_EQUAL, GRN_OP_AND,
GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE);
}
}
if (input->each.length > 0) {
grn_obj *v;
GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->each, v);
if (loader->each && v) {
grn_expr_parse(ctx, loader->each,
input->each.value,
input->each.length,
NULL, GRN_OP_EQUAL, GRN_OP_AND,
GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE);
}
}
loader->output_ids = input->output_ids;
loader->output_errors = input->output_errors;
loader->lock_table = input->lock_table;
} else {
if (!loader->table) {
ERR(GRN_INVALID_ARGUMENT, "mandatory \"table\" parameter is absent");
loader->stat = GRN_LOADER_END;
return;
}
}
switch (loader->input_type) {
case GRN_CONTENT_JSON :
json_read(ctx, loader, input->values.value, input->values.length);
break;
case GRN_CONTENT_NONE :
case GRN_CONTENT_TSV :
case GRN_CONTENT_XML :
case GRN_CONTENT_MSGPACK :
case GRN_CONTENT_GROONGA_COMMAND_LIST :
ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "unsupported input_type");
loader->stat = GRN_LOADER_END;
// todo
break;
}
}
grn_rc
grn_load(grn_ctx *ctx, grn_content_type input_type,
const char *table, unsigned int table_len,
const char *columns, unsigned int columns_len,
const char *values, unsigned int values_len,
const char *ifexists, unsigned int ifexists_len,
const char *each, unsigned int each_len)
{
if (!ctx || !ctx->impl) {
ERR(GRN_INVALID_ARGUMENT, "db not initialized");
return ctx->rc;
}
GRN_API_ENTER;
{
grn_load_input input;
input.type = input_type;
input.table.value = table;
input.table.length = table_len;
input.columns.value = columns;
input.columns.length = columns_len;
input.values.value = values;
input.values.length = values_len;
input.if_exists.value = ifexists;
input.if_exists.length = ifexists_len;
input.each.value = each;
input.each.length = each_len;
input.output_ids = GRN_FALSE;
input.output_errors = GRN_FALSE;
input.lock_table = GRN_FALSE;
input.emit_level = 1;
grn_load_internal(ctx, &input);
}
GRN_API_RETURN(ctx->rc);
}