Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embed yolo files #831

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,15 @@ function gg_run_yolo {
grep -q "truck: 56%" $OUT/${ci}-main.log
grep -q "bicycle: 59%" $OUT/${ci}-main.log

python3 ../examples/yolo/gguf-addfile.py yolov3-tiny.gguf yolov3-tiny-data.gguf data/coco.names data/labels/*.png

(time ./bin/yolov3-tiny -m yolov3-tiny-data.gguf -i ${path_models}/dog.jpg ) 2>&1 | tee -a $OUT/${ci}-main2.log

grep -q "dog: 57%" $OUT/${ci}-main2.log
grep -q "car: 52%" $OUT/${ci}-main2.log
grep -q "truck: 56%" $OUT/${ci}-main2.log
grep -q "bicycle: 59%" $OUT/${ci}-main2.log

set +e
}

Expand Down
178 changes: 178 additions & 0 deletions examples/yolo/gguf-addfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env python3
# gguf-addfile.py srcfile dstfile addfiles ...

from __future__ import annotations

import logging
import argparse
import os
import sys
from pathlib import Path
from typing import Any
#from typing import Any, Literal, NamedTuple, TypeVar, Union

import numpy as np
import numpy.typing as npt

# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))

from gguf import GGUFReader, GGUFWriter, ReaderField, GGMLQuantizationType, GGUFEndian, GGUFValueType, Keys # noqa: E402

logger = logging.getLogger("gguf-addfile")


def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
if reader.byte_order == 'S':
file_endian = 'BIG' if host_endian == 'LITTLE' else 'LITTLE'
else:
file_endian = host_endian
return (host_endian, file_endian)


def get_byteorder(reader: GGUFReader) -> GGUFEndian:
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
# Host is little endian
host_endian = GGUFEndian.LITTLE
swapped_endian = GGUFEndian.BIG
else:
# Sorry PDP or other weird systems that don't use BE or LE.
host_endian = GGUFEndian.BIG
swapped_endian = GGUFEndian.LITTLE

if reader.byte_order == "S":
return swapped_endian
else:
return host_endian


def decode_field(field: ReaderField) -> Any:
if field and field.types:
main_type = field.types[0]

if main_type == GGUFValueType.ARRAY:
sub_type = field.types[-1]

if sub_type == GGUFValueType.STRING:
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
else:
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
if main_type == GGUFValueType.STRING:
return str(bytes(field.parts[-1]), encoding='utf8')
else:
return field.parts[-1][0]

return None


def get_field_data(reader: GGUFReader, key: str) -> Any:
field = reader.get_field(key)

return decode_field(field)


def copy_with_filename(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, filename: str[Any]) -> None:
logger.debug(f'copy_with_filename: {filename}') #debug
val = filename
for field in reader.fields.values():
# Suppress virtual fields and fields written by GGUFWriter
if field.name == Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
logger.debug(f'Suppressing {field.name}')
continue

# Copy existed fields except 'embedded_files'
if not field.name == Keys.EMBEDDED_FILES:
cur_val = decode_field(field)
writer.add_key(field.name)
writer.add_val(cur_val, field.types[0])
logger.debug(f'Copying {field.name}')
continue

# Update embedded_files
val = decode_field(field)
for path in filename:
logger.debug(f'Adding {field.name}: {path}')
val.append(path)

# Add filenames to kv
logger.info(f'* Modifying {Keys.EMBEDDED_FILES} to {val}')
writer.add_array(Keys.EMBEDDED_FILES, val)

for tensor in reader.tensors:
# Dimensions are written in reverse order, so flip them first
shape = np.flipud(tensor.shape)
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)

# Add file info as tensor_info
for path in filename:
logger.debug(f'Adding tensor_info {path}')
with open(path, "rb") as f:
data = f.read()
data_len = len(data)
dims = [data_len]
raw_dtype = GGMLQuantizationType.I8
writer.add_tensor_info(path, dims, np.float16, data_len, raw_dtype)

writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()

for tensor in reader.tensors:
writer.write_tensor_data(tensor.data)

# Write file body as tensor data
for path in filename:
logger.debug(f'Adding tensor data {path}')
with open(path, "rb") as f:
data = f.read()
data_len = len(data)
# write data with padding
writer.write_data(data)

writer.close()


def main() -> None:
parser = argparse.ArgumentParser(description="Add files to GGUF file metadata")
parser.add_argument("input", type=str, help="GGUF format model input filename")
parser.add_argument("output", type=str, help="GGUF format model output filename")
parser.add_argument("addfiles", type=str, nargs='+', help="add filenames ...")
parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation")
parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

logger.info(f'* Loading: {args.input}')
reader = GGUFReader(args.input, 'r')
arch = get_field_data(reader, Keys.General.ARCHITECTURE)
endianess = get_byteorder(reader)

if os.path.isfile(args.output) and not args.force:
logger.warning('*** Warning *** Warning *** Warning **')
logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!')
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
response = input('YES, I am sure> ')
if response != 'YES':
logger.info("You didn't enter YES. Okay then, see ya!")
sys.exit(0)

logger.info(f'* Writing: {args.output}')
writer = GGUFWriter(args.output, arch=arch, endianess=endianess)

alignment = get_field_data(reader, Keys.General.ALIGNMENT)
if alignment is not None:
logger.debug(f'Setting custom alignment: {alignment}')
writer.data_alignment = alignment

if args.addfiles is not None:
filename = []
for path in args.addfiles:
filename.append(path)
logger.info(f'* Adding: {path}')
copy_with_filename(reader, writer, filename)


if __name__ == '__main__':
main()
27 changes: 26 additions & 1 deletion examples/yolo/yolo-image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ bool load_image(const char *fname, yolo_image & img)
return true;
}

bool load_image_from_memory(const char *buffer, int len, yolo_image & img)
{
int w, h, c;
uint8_t * data = stbi_load_from_memory((uint8_t *)buffer, len, &w, &h, &c, 3);
if (!data) {
return false;
}
c = 3;
img.w = w;
img.h = h;
img.c = c;
img.data.resize(w*h*c);
for (int k = 0; k < c; ++k){
for (int j = 0; j < h; ++j){
for (int i = 0; i < w; ++i){
int dst_index = i + w*j + w*h*k;
int src_index = k + c*i + c*w*j;
img.data[dst_index] = (float)data[src_index]/255.;
}
}
}
stbi_image_free(data);
return true;
}

static yolo_image resize_image(const yolo_image & im, int w, int h)
{
yolo_image resized(w, h, im.c);
Expand Down Expand Up @@ -207,4 +232,4 @@ void draw_label(yolo_image & im, int row, int col, const yolo_image & label, con
}
}
}
}
}
1 change: 1 addition & 0 deletions examples/yolo/yolo-image.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct yolo_image {
};

bool load_image(const char *fname, yolo_image & img);
bool load_image_from_memory(const char *buffer, int len, yolo_image & img);
void draw_box_width(yolo_image & a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
yolo_image letterbox_image(const yolo_image & im, int w, int h);
bool save_image(const yolo_image & im, const char *name, int quality);
Expand Down
107 changes: 101 additions & 6 deletions examples/yolo/yolov3-tiny.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct yolo_model {
int height = 416;
std::vector<conv2d_layer> conv2d_layers;
struct ggml_context * ctx;
struct gguf_context * ctx_gguf;
};

struct yolo_layer {
Expand Down Expand Up @@ -71,6 +72,7 @@ static bool load_model(const std::string & fname, yolo_model & model) {
fprintf(stderr, "%s: gguf_init_from_file() failed\n", __func__);
return false;
}
model.ctx_gguf = ctx;
model.width = 416;
model.height = 416;
model.conv2d_layers.resize(13);
Expand Down Expand Up @@ -100,6 +102,47 @@ static bool load_model(const std::string & fname, yolo_model & model) {
return true;
}

// istream from memory
#include <streambuf>
#include <istream>

struct membuf : std::streambuf {
membuf(const char * begin, const char * end) {
char * b(const_cast<char *>(begin));
char * e(const_cast<char *>(end));
this->begin = b;
this->end = e;
this->setg(b, b, e);
}

membuf(const char * base, size_t size) {
char * b(const_cast<char *>(begin));
this->begin = b;
this->end = b + size;
this->setg(b, b, end);
}

virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which = std::ios_base::in) override {
if(dir == std::ios_base::cur) {
gbump(off);
} else if(dir == std::ios_base::end) {
setg(begin, end + off, end);
} else if(dir == std::ios_base::beg) {
setg(begin, begin + off, end);
}

return gptr() - eback();
}

virtual pos_type seekpos(std::streampos pos, std::ios_base::openmode mode) override {
return seekoff(pos - pos_type(off_type(0)), std::ios_base::beg, mode);
}

char * begin;
char * end;
};


static bool load_labels(const char * filename, std::vector<std::string> & labels)
{
std::ifstream file_in(filename);
Expand All @@ -114,6 +157,28 @@ static bool load_labels(const char * filename, std::vector<std::string> & labels
return true;
}

static bool load_labels_gguf(const struct gguf_context * ctx, const char * filename, std::vector<std::string> & labels)
{
int tensor = gguf_find_tensor(ctx, filename);
if (tensor == -1) {
return false;
}
const size_t offset = gguf_get_tensor_offset(ctx, tensor);
const size_t len = gguf_get_tensor_size(ctx, tensor);
Copy link
Owner

@ggerganov ggerganov Jun 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow I didn't notice this before: gguf_get_tensor_size() is not needed too. You can instead use:

Suggested change
const size_t len = gguf_get_tensor_size(ctx, tensor);
const size_t len = ggml_nelements(tensor);

So remove gguf_get_tensor_size all together

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, i removed gguf_get_tensor_size from ggml.h, ggml.c, yolov3-tiny.cpp.

const char * data = (char *)gguf_get_data(ctx);
membuf buf(data + offset, data + offset + len);
std::istream file_in(&buf);
if (!file_in) {
return false;
}
std::string line;
while (std::getline(file_in, line)) {
labels.push_back(line);
}
GGML_ASSERT(labels.size() == 80);
return true;
}

static bool load_alphabet(std::vector<yolo_image> & alphabet)
{
alphabet.resize(8 * 128);
Expand All @@ -130,6 +195,30 @@ static bool load_alphabet(std::vector<yolo_image> & alphabet)
return true;
}

static bool load_alphabet_gguf(const struct gguf_context * ctx, std::vector<yolo_image> & alphabet)
{
alphabet.resize(8 * 128);
for (int j = 0; j < 8; j++) {
for (int i = 32; i < 127; i++) {
char fname[256];
sprintf(fname, "data/labels/%d_%d.png", i, j);
int tensor = gguf_find_tensor(ctx, fname);
if (tensor == -1) {
fprintf(stderr, "Cannot find '%s' in tensor\n", fname);
return false;
}
const size_t offset = gguf_get_tensor_offset(ctx, tensor);
const size_t len = gguf_get_tensor_size(ctx, tensor);
const char * data = (char *)gguf_get_data(ctx);
if (!load_image_from_memory(data + offset, len, alphabet[j*128 + i])) {
fprintf(stderr, "Cannot load '%s'\n", fname);
return false;
}
}
}
return true;
}

static ggml_tensor * apply_conv2d(ggml_context * ctx, ggml_tensor * input, const conv2d_layer & layer)
{
struct ggml_tensor * result = ggml_conv_2d(ctx, layer.weights, input, 1, 1, layer.padding, layer.padding, 1, 1);
Expand Down Expand Up @@ -503,14 +592,20 @@ int main(int argc, char *argv[])
return 1;
}
std::vector<std::string> labels;
if (!load_labels("data/coco.names", labels)) {
fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
return 1;
if (!load_labels_gguf(model.ctx_gguf, "data/coco.names", labels)) {
fprintf(stderr, "%s: skipped loading labels from 'data/coco.names' in model\n", __func__);
if (!load_labels("data/coco.names", labels)) {
fprintf(stderr, "%s: failed to load labels from 'data/coco.names'\n", __func__);
return 1;
}
}
std::vector<yolo_image> alphabet;
if (!load_alphabet(alphabet)) {
fprintf(stderr, "%s: failed to load alphabet\n", __func__);
return 1;
if (!load_alphabet_gguf(model.ctx_gguf, alphabet)) {
fprintf(stderr, "%s: skipped loading alphabet from model\n", __func__);
if (!load_alphabet(alphabet)) {
fprintf(stderr, "%s: failed to load alphabet\n", __func__);
return 1;
}
}
const int64_t t_start_ms = ggml_time_ms();
detect(img, model, params.thresh, labels, alphabet);
Expand Down
Loading
Loading