|
|
@ -1,66 +1,63 @@ |
|
|
|
#include <stdlib.h> |
|
|
|
#include "region_layer.h" |
|
|
|
|
|
|
|
#include <math.h> |
|
|
|
#include <stdio.h> |
|
|
|
#include "region_layer.h" |
|
|
|
#include <stdlib.h> |
|
|
|
|
|
|
|
typedef struct |
|
|
|
{ |
|
|
|
typedef struct { |
|
|
|
float x; |
|
|
|
float y; |
|
|
|
float w; |
|
|
|
float h; |
|
|
|
} box_t; |
|
|
|
|
|
|
|
typedef struct |
|
|
|
{ |
|
|
|
typedef struct { |
|
|
|
int index; |
|
|
|
int class; |
|
|
|
float **probs; |
|
|
|
} sortable_box_t; |
|
|
|
|
|
|
|
|
|
|
|
int region_layer_init(region_layer_t *rl, int width, int height, int channels, int origin_width, int origin_height) |
|
|
|
{ |
|
|
|
int flag = 0; |
|
|
|
|
|
|
|
rl->coords = 4; |
|
|
|
rl->image_width = 320; |
|
|
|
rl->image_height = 240; |
|
|
|
/* As no more parameter adding to this function,
|
|
|
|
image width(height) is regarded as net input shape as well as image capture from sensor. |
|
|
|
If net input did not match sensor input, `dvp_set_image_size` function can set sensor output shape. |
|
|
|
*/ |
|
|
|
rl->image_width = origin_width; |
|
|
|
rl->image_height = origin_height; |
|
|
|
|
|
|
|
rl->classes = channels / 5 - 5; |
|
|
|
rl->net_width = origin_width; |
|
|
|
rl->net_height = origin_height; |
|
|
|
rl->layer_width = width; |
|
|
|
rl->layer_height = height; |
|
|
|
rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); |
|
|
|
rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); |
|
|
|
rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); |
|
|
|
|
|
|
|
rl->output = malloc(rl->output_number * sizeof(float)); |
|
|
|
if (rl->output == NULL) |
|
|
|
{ |
|
|
|
if (rl->output == NULL) { |
|
|
|
flag = -1; |
|
|
|
goto malloc_error; |
|
|
|
} |
|
|
|
rl->boxes = malloc(rl->boxes_number * sizeof(box_t)); |
|
|
|
if (rl->boxes == NULL) |
|
|
|
{ |
|
|
|
if (rl->boxes == NULL) { |
|
|
|
flag = -2; |
|
|
|
goto malloc_error; |
|
|
|
} |
|
|
|
rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float)); |
|
|
|
if (rl->probs_buf == NULL) |
|
|
|
{ |
|
|
|
if (rl->probs_buf == NULL) { |
|
|
|
flag = -3; |
|
|
|
goto malloc_error; |
|
|
|
} |
|
|
|
rl->probs = malloc(rl->boxes_number * sizeof(float *)); |
|
|
|
if (rl->probs == NULL) |
|
|
|
{ |
|
|
|
if (rl->probs == NULL) { |
|
|
|
flag = -4; |
|
|
|
goto malloc_error; |
|
|
|
} |
|
|
|
for (uint32_t i = 0; i < rl->boxes_number; i++) |
|
|
|
rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); |
|
|
|
for (uint32_t i = 0; i < rl->boxes_number; i++) rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); |
|
|
|
return 0; |
|
|
|
malloc_error: |
|
|
|
free(rl->output); |
|
|
@ -78,24 +75,20 @@ void region_layer_deinit(region_layer_t *rl) |
|
|
|
free(rl->probs); |
|
|
|
} |
|
|
|
|
|
|
|
static inline float sigmoid(float x) |
|
|
|
{ |
|
|
|
return 1.f / (1.f + expf(-x)); |
|
|
|
} |
|
|
|
static inline float sigmoid(float x) { return 1.f / (1.f + expf(-x)); } |
|
|
|
|
|
|
|
static void activate_array(region_layer_t *rl, int index, int n) |
|
|
|
{ |
|
|
|
float *output = &rl->output[index]; |
|
|
|
float *input = &rl->input[index]; |
|
|
|
|
|
|
|
for (int i = 0; i < n; ++i) |
|
|
|
output[i] = sigmoid(input[i]); |
|
|
|
for (int i = 0; i < n; ++i) output[i] = sigmoid(input[i]); |
|
|
|
} |
|
|
|
|
|
|
|
static int entry_index(region_layer_t *rl, int location, int entry) |
|
|
|
{ |
|
|
|
int wh = rl->layer_width * rl->layer_height; |
|
|
|
int n = location / wh; |
|
|
|
int n = location / wh; |
|
|
|
int loc = location % wh; |
|
|
|
|
|
|
|
return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc; |
|
|
@ -109,10 +102,8 @@ static void softmax(region_layer_t *rl, float *input, int n, int stride, float * |
|
|
|
float sum = 0; |
|
|
|
float largest_i = input[0]; |
|
|
|
|
|
|
|
for (i = 0; i < n; ++i) |
|
|
|
{ |
|
|
|
if (input[i * stride] > largest_i) |
|
|
|
largest_i = input[i * stride]; |
|
|
|
for (i = 0; i < n; ++i) { |
|
|
|
if (input[i * stride] > largest_i) largest_i = input[i * stride]; |
|
|
|
} |
|
|
|
|
|
|
|
for (i = 0; i < n; ++i) { |
|
|
@ -121,17 +112,16 @@ static void softmax(region_layer_t *rl, float *input, int n, int stride, float * |
|
|
|
sum += e; |
|
|
|
output[i * stride] = e; |
|
|
|
} |
|
|
|
for (i = 0; i < n; ++i) |
|
|
|
output[i * stride] /= sum; |
|
|
|
for (i = 0; i < n; ++i) output[i * stride] /= sum; |
|
|
|
} |
|
|
|
|
|
|
|
static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, float *output) |
|
|
|
static void softmax_cpu(region_layer_t *rl, float *input, int n, int batch, int batch_offset, int groups, int stride, |
|
|
|
float *output) |
|
|
|
{ |
|
|
|
int g, b; |
|
|
|
|
|
|
|
for (b = 0; b < batch; ++b) { |
|
|
|
for (g = 0; g < groups; ++g) |
|
|
|
softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g); |
|
|
|
for (g = 0; g < groups; ++g) softmax(rl, input + b * batch_offset + g, n, stride, output + b * batch_offset + g); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@ -139,11 +129,9 @@ static void forward_region_layer(region_layer_t *rl) |
|
|
|
{ |
|
|
|
int index; |
|
|
|
|
|
|
|
for (index = 0; index < rl->output_number; index++) |
|
|
|
rl->output[index] = rl->input[index]; |
|
|
|
for (index = 0; index < rl->output_number; index++) rl->output[index] = rl->input[index]; |
|
|
|
|
|
|
|
for (int n = 0; n < rl->anchor_number; ++n) |
|
|
|
{ |
|
|
|
for (int n = 0; n < rl->anchor_number; ++n) { |
|
|
|
index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0); |
|
|
|
activate_array(rl, index, 2 * rl->layer_width * rl->layer_height); |
|
|
|
index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4); |
|
|
@ -151,9 +139,8 @@ static void forward_region_layer(region_layer_t *rl) |
|
|
|
} |
|
|
|
|
|
|
|
index = entry_index(rl, 0, rl->coords + 1); |
|
|
|
softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number, |
|
|
|
rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height, |
|
|
|
rl->layer_width * rl->layer_height, rl->output + index); |
|
|
|
softmax_cpu(rl, rl->input + index, rl->classes, rl->anchor_number, rl->output_number / rl->anchor_number, |
|
|
|
rl->layer_width * rl->layer_height, rl->layer_width * rl->layer_height, rl->output + index); |
|
|
|
} |
|
|
|
|
|
|
|
static void correct_region_boxes(region_layer_t *rl, box_t *boxes) |
|
|
@ -166,8 +153,7 @@ static void correct_region_boxes(region_layer_t *rl, box_t *boxes) |
|
|
|
int new_w = 0; |
|
|
|
int new_h = 0; |
|
|
|
|
|
|
|
if (((float)net_width / image_width) < |
|
|
|
((float)net_height / image_height)) { |
|
|
|
if (((float)net_width / image_width) < ((float)net_height / image_height)) { |
|
|
|
new_w = net_width; |
|
|
|
new_h = (image_height * net_width) / image_width; |
|
|
|
} else { |
|
|
@ -177,10 +163,8 @@ static void correct_region_boxes(region_layer_t *rl, box_t *boxes) |
|
|
|
for (int i = 0; i < boxes_number; ++i) { |
|
|
|
box_t b = boxes[i]; |
|
|
|
|
|
|
|
b.x = (b.x - (net_width - new_w) / 2. / net_width) / |
|
|
|
((float)new_w / net_width); |
|
|
|
b.y = (b.y - (net_height - new_h) / 2. / net_height) / |
|
|
|
((float)new_h / net_height); |
|
|
|
b.x = (b.x - (net_width - new_w) / 2. / net_width) / ((float)new_w / net_width); |
|
|
|
b.y = (b.y - (net_height - new_h) / 2. / net_height) / ((float)new_h / net_height); |
|
|
|
b.w *= (float)net_width / new_w; |
|
|
|
b.h *= (float)net_height / new_h; |
|
|
|
boxes[i] = b; |
|
|
@ -207,34 +191,29 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro |
|
|
|
uint32_t coords = rl->coords; |
|
|
|
float threshold = rl->threshold; |
|
|
|
|
|
|
|
for (int i = 0; i < layer_width * layer_height; ++i) |
|
|
|
{ |
|
|
|
for (int i = 0; i < layer_width * layer_height; ++i) { |
|
|
|
int row = i / layer_width; |
|
|
|
int col = i % layer_width; |
|
|
|
|
|
|
|
for (int n = 0; n < anchor_number; ++n) |
|
|
|
{ |
|
|
|
for (int n = 0; n < anchor_number; ++n) { |
|
|
|
int index = n * layer_width * layer_height + i; |
|
|
|
|
|
|
|
for (int j = 0; j < classes; ++j) |
|
|
|
probs[index][j] = 0; |
|
|
|
for (int j = 0; j < classes; ++j) probs[index][j] = 0; |
|
|
|
int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords); |
|
|
|
int box_index = entry_index(rl, n * layer_width * layer_height + i, 0); |
|
|
|
float scale = predictions[obj_index]; |
|
|
|
float scale = predictions[obj_index]; |
|
|
|
|
|
|
|
boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, |
|
|
|
layer_width, layer_height, layer_width * layer_height); |
|
|
|
boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, layer_width, layer_height, |
|
|
|
layer_width * layer_height); |
|
|
|
|
|
|
|
float max = 0; |
|
|
|
|
|
|
|
for (int j = 0; j < classes; ++j) |
|
|
|
{ |
|
|
|
for (int j = 0; j < classes; ++j) { |
|
|
|
int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j); |
|
|
|
float prob = scale * predictions[class_index]; |
|
|
|
|
|
|
|
probs[index][j] = (prob > threshold) ? prob : 0; |
|
|
|
if (prob > max) |
|
|
|
max = prob; |
|
|
|
if (prob > max) max = prob; |
|
|
|
} |
|
|
|
probs[index][classes] = max; |
|
|
|
} |
|
|
@ -257,11 +236,11 @@ static int nms_comparator(void *pa, void *pb) |
|
|
|
|
|
|
|
static float overlap(float x1, float w1, float x2, float w2) |
|
|
|
{ |
|
|
|
float l1 = x1 - w1/2; |
|
|
|
float l2 = x2 - w2/2; |
|
|
|
float l1 = x1 - w1 / 2; |
|
|
|
float l2 = x2 - w2 / 2; |
|
|
|
float left = l1 > l2 ? l1 : l2; |
|
|
|
float r1 = x1 + w1/2; |
|
|
|
float r2 = x2 + w2/2; |
|
|
|
float r1 = x1 + w1 / 2; |
|
|
|
float r2 = x2 + w2 / 2; |
|
|
|
float right = r1 < r2 ? r1 : r2; |
|
|
|
|
|
|
|
return right - left; |
|
|
@ -272,8 +251,7 @@ static float box_intersection(box_t a, box_t b) |
|
|
|
float w = overlap(a.x, a.w, b.x, b.w); |
|
|
|
float h = overlap(a.y, a.h, b.y, b.h); |
|
|
|
|
|
|
|
if (w < 0 || h < 0) |
|
|
|
return 0; |
|
|
|
if (w < 0 || h < 0) return 0; |
|
|
|
return w * h; |
|
|
|
} |
|
|
|
|
|
|
@ -285,10 +263,7 @@ static float box_union(box_t a, box_t b) |
|
|
|
return u; |
|
|
|
} |
|
|
|
|
|
|
|
static float box_iou(box_t a, box_t b) |
|
|
|
{ |
|
|
|
return box_intersection(a, b) / box_union(a, b); |
|
|
|
} |
|
|
|
static float box_iou(box_t a, box_t b) { return box_intersection(a, b) / box_union(a, b); } |
|
|
|
|
|
|
|
static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs) |
|
|
|
{ |
|
|
@ -298,30 +273,23 @@ static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs) |
|
|
|
int i, j, k; |
|
|
|
sortable_box_t s[boxes_number]; |
|
|
|
|
|
|
|
for (i = 0; i < boxes_number; ++i) |
|
|
|
{ |
|
|
|
for (i = 0; i < boxes_number; ++i) { |
|
|
|
s[i].index = i; |
|
|
|
s[i].class = 0; |
|
|
|
s[i].probs = probs; |
|
|
|
} |
|
|
|
|
|
|
|
for (k = 0; k < classes; ++k) |
|
|
|
{ |
|
|
|
for (i = 0; i < boxes_number; ++i) |
|
|
|
s[i].class = k; |
|
|
|
for (k = 0; k < classes; ++k) { |
|
|
|
for (i = 0; i < boxes_number; ++i) s[i].class = k; |
|
|
|
qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator); |
|
|
|
for (i = 0; i < boxes_number; ++i) |
|
|
|
{ |
|
|
|
if (probs[s[i].index][k] == 0) |
|
|
|
continue; |
|
|
|
for (i = 0; i < boxes_number; ++i) { |
|
|
|
if (probs[s[i].index][k] == 0) continue; |
|
|
|
box_t a = boxes[s[i].index]; |
|
|
|
|
|
|
|
for (j = i + 1; j < boxes_number; ++j) |
|
|
|
{ |
|
|
|
for (j = i + 1; j < boxes_number; ++j) { |
|
|
|
box_t b = boxes[s[j].index]; |
|
|
|
|
|
|
|
if (box_iou(a, b) > nms_value) |
|
|
|
probs[s[j].index][k] = 0; |
|
|
|
if (box_iou(a, b) > nms_value) probs[s[j].index][k] = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -332,11 +300,9 @@ static int max_index(float *a, int n) |
|
|
|
int i, max_i = 0; |
|
|
|
float max = a[0]; |
|
|
|
|
|
|
|
for (i = 1; i < n; ++i) |
|
|
|
{ |
|
|
|
if (a[i] > max) |
|
|
|
{ |
|
|
|
max = a[i]; |
|
|
|
for (i = 1; i < n; ++i) { |
|
|
|
if (a[i] > max) { |
|
|
|
max = a[i]; |
|
|
|
max_i = i; |
|
|
|
} |
|
|
|
} |
|
|
@ -351,14 +317,12 @@ static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info) |
|
|
|
uint32_t boxes_number = rl->boxes_number; |
|
|
|
float threshold = rl->threshold; |
|
|
|
box_t *boxes = (box_t *)rl->boxes; |
|
|
|
|
|
|
|
for (int i = 0; i < rl->boxes_number; ++i) |
|
|
|
{ |
|
|
|
int class = max_index(rl->probs[i], rl->classes); |
|
|
|
|
|
|
|
for (int i = 0; i < rl->boxes_number; ++i) { |
|
|
|
int class = max_index(rl->probs[i], rl->classes); |
|
|
|
float prob = rl->probs[i][class]; |
|
|
|
|
|
|
|
if (prob > threshold) |
|
|
|
{ |
|
|
|
if (prob > threshold) { |
|
|
|
box_t *b = boxes + i; |
|
|
|
obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2); |
|
|
|
obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2); |
|
|
@ -380,7 +344,8 @@ void region_layer_run(region_layer_t *rl, obj_info_t *obj_info) |
|
|
|
region_layer_output(rl, obj_info); |
|
|
|
} |
|
|
|
|
|
|
|
void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color) |
|
|
|
void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t color, uint16_t image_width, |
|
|
|
uint16_t image_height) |
|
|
|
{ |
|
|
|
uint32_t data = ((uint32_t)color << 16) | (uint32_t)color; |
|
|
|
uint32_t *addr1, *addr2, *addr3, *addr4, x1, y1, x2, y2; |
|
|
@ -390,48 +355,41 @@ void draw_edge(uint32_t *gram, obj_info_t *obj_info, uint32_t index, uint16_t co |
|
|
|
x2 = obj_info->obj[index].x2; |
|
|
|
y2 = obj_info->obj[index].y2; |
|
|
|
|
|
|
|
if (x1 <= 0) |
|
|
|
x1 = 1; |
|
|
|
if (x2 >= 319) |
|
|
|
x2 = 318; |
|
|
|
if (y1 <= 0) |
|
|
|
y1 = 1; |
|
|
|
if (y2 >= 239) |
|
|
|
y2 = 238; |
|
|
|
|
|
|
|
addr1 = gram + (320 * y1 + x1) / 2; |
|
|
|
addr2 = gram + (320 * y1 + x2 - 8) / 2; |
|
|
|
addr3 = gram + (320 * (y2 - 1) + x1) / 2; |
|
|
|
addr4 = gram + (320 * (y2 - 1) + x2 - 8) / 2; |
|
|
|
for (uint32_t i = 0; i < 4; i++) |
|
|
|
{ |
|
|
|
if (x1 <= 0) x1 = 1; |
|
|
|
if (x2 >= image_width - 1) x2 = image_width - 2; |
|
|
|
if (y1 <= 0) y1 = 1; |
|
|
|
if (y2 >= image_height - 1) y2 = image_height - 2; |
|
|
|
|
|
|
|
addr1 = gram + (image_width * y1 + x1) / 2; |
|
|
|
addr2 = gram + (image_width * y1 + x2 - 8) / 2; |
|
|
|
addr3 = gram + (image_width * (y2 - 1) + x1) / 2; |
|
|
|
addr4 = gram + (image_width * (y2 - 1) + x2 - 8) / 2; |
|
|
|
for (uint32_t i = 0; i < 4; i++) { |
|
|
|
*addr1 = data; |
|
|
|
*(addr1 + 160) = data; |
|
|
|
*(addr1 + image_width / 2) = data; |
|
|
|
*addr2 = data; |
|
|
|
*(addr2 + 160) = data; |
|
|
|
*(addr2 + image_width / 2) = data; |
|
|
|
*addr3 = data; |
|
|
|
*(addr3 + 160) = data; |
|
|
|
*(addr3 + image_width / 2) = data; |
|
|
|
*addr4 = data; |
|
|
|
*(addr4 + 160) = data; |
|
|
|
*(addr4 + image_width / 2) = data; |
|
|
|
addr1++; |
|
|
|
addr2++; |
|
|
|
addr3++; |
|
|
|
addr4++; |
|
|
|
} |
|
|
|
addr1 = gram + (320 * y1 + x1) / 2; |
|
|
|
addr2 = gram + (320 * y1 + x2 - 2) / 2; |
|
|
|
addr3 = gram + (320 * (y2 - 8) + x1) / 2; |
|
|
|
addr4 = gram + (320 * (y2 - 8) + x2 - 2) / 2; |
|
|
|
for (uint32_t i = 0; i < 8; i++) |
|
|
|
{ |
|
|
|
addr1 = gram + (image_width * y1 + x1) / 2; |
|
|
|
addr2 = gram + (image_width * y1 + x2 - 2) / 2; |
|
|
|
addr3 = gram + (image_width * (y2 - 8) + x1) / 2; |
|
|
|
addr4 = gram + (image_width * (y2 - 8) + x2 - 2) / 2; |
|
|
|
for (uint32_t i = 0; i < 8; i++) { |
|
|
|
*addr1 = data; |
|
|
|
*addr2 = data; |
|
|
|
*addr3 = data; |
|
|
|
*addr4 = data; |
|
|
|
addr1 += 160; |
|
|
|
addr2 += 160; |
|
|
|
addr3 += 160; |
|
|
|
addr4 += 160; |
|
|
|
addr1 += image_width / 2; |
|
|
|
addr2 += image_width / 2; |
|
|
|
addr3 += image_width / 2; |
|
|
|
addr4 += image_width / 2; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|