Skip to content

Vldbss 2025 #579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/pax_storage_concurrency_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class BenchmarkBase : public Fixture
table_meta_->fields_[1].attr_len_ = 11;
table_meta_->fields_[1].field_id_ = 1;
handler_ = new RecordFileHandler(StorageFormat::PAX_FORMAT);
rc = handler_->init(*buffer_pool_, log_handler_, table_meta_);
rc = handler_->init(*buffer_pool_, log_handler_, table_meta_, nullptr);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to init record file handler. rc=%s", strrc(rc));
throw runtime_error("failed to init record file handler");
Expand Down
2 changes: 1 addition & 1 deletion benchmark/record_manager_concurrency_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class BenchmarkBase : public Fixture
}

handler_ = new RecordFileHandler(StorageFormat::ROW_FORMAT);
rc = handler_->init(*buffer_pool_, log_handler_, nullptr);
rc = handler_->init(*buffer_pool_, log_handler_, nullptr, nullptr);
if (rc != RC::SUCCESS) {
LOG_WARN("failed to init record file handler. rc=%s", strrc(rc));
throw runtime_error("failed to init record file handler");
Expand Down
32 changes: 32 additions & 0 deletions docs/docs/design/miniob-how-to-add-new-datatype.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
title: 如何新增一种数据类型
---

> 本文介绍如何新增一种数据类型。
MiniOB 的数据类型系统采用分层设计,实现集中在[path](../../../src/observer/common)文件夹下,核心组件包括:
1. Value 类:统一数据操作接口
路径:src/observer/common/value.h
作用:封装实际数据值,提供类型无关的操作方法
2. Type 工具类:特定类型的操作实现
路径:src/observer/common/type/
作用:每种数据类型对应一个工具类,实现具体运算逻辑

以下示例展示 MiniOB 如何处理整数类型数据:
```cpp
// 假设解析器识别到整数 "1"
int val = 1;
Value value(val); // 封装为 Value 对象
// 执行加法运算
Value result;
Value::add(value, value, result); // 调用加法接口
// Value::add 方法内部会根据类型调用对应工具类
// 对于 INT 类型,实际调用代码位于:
// src/observer/common/type/integer_type.cpp
```

# 若要新增一种数据类型(如 DATE),建议按以下步骤开发:
1. 在 src/observer/common/type/attr_type.h 中添加新的类型枚举以及对应类型名
2. 在 src/observer/common/type/data_type.cpp 中添加新的类型实例
3. 在 src/observer/common/type/ 文件夹下,参照现有工具类,实现 DateType 工具类
4. 在 Value 类中增加类型处理逻辑,支持date类型的分发,储存date类型值
5. 必要情况下还需要增加新的词法规则(lex_sql.l)以及语法规则(yacc_sql.y),支持新类型关键字
518 changes: 518 additions & 0 deletions docs/docs/design/miniob-realtime-analytic.md

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions docs/docs/how_to_build.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,32 @@ git config --global core.autocrlf false
关于该问题的更多细节,请参考[问题来源](https://ask.oceanbase.com/t/topic/35604437/7)。
关于该问题的进一步分析,请参考[Linux系统下执行sudo命令环境变量失效现象](https://zhuanlan.zhihu.com/p/669332689)。
也可以将cmake所在路径添加到sudo的PATH变量中来解决上述问题,请参考[sudo命令下环境变量实效的解决方法](https://www.cnblogs.com/xiao-xiaoyang/p/17444600.html)。


### 3. Could not find a package configuration file provided by "Libevent"
在执行build.sh脚本时,遇到下面的错误
![cmake error](images/miniob-build-libevent.png)

通常是因为cmake版本原因(版本太高?)导致libevent在init阶段没有编译成功。

***解决方法:***

在[text](../../deps/3rd/libevent/CMakeLists.txt) 中将cmake的最低版本设置
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
改为
cmake_minimum_required(VERSION 3.1...3.8 FATAL_ERROR)
之后重新执行
```bash
sudo bash build.sh init
```

如果你成功解决libevent的问题,你大概率会遇到另一个错误:
![cmake error](images/miniob-build-jsoncpp.png)
需要在[text](../../deps/3rd/jsoncpp/jsoncppConfig.cmake.in)中将cmake策略
cmake_policy(VERSION 3.0)
改为
cmake_policy(VERSION 3.0...3.8)
之后重新执行
```bash
sudo bash build.sh init
```
Binary file added docs/docs/images/miniob-build-jsoncpp.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/docs/images/miniob-build-libevent.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions src/common/lang/comparator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ int compare_int(void *arg1, void *arg2)
}
}

int compare_int64(void *arg1, void *arg2)
{
int v1 = *(int64_t *)arg1;
int v2 = *(int64_t *)arg2;
if (v1 > v2) {
return 1;
} else if (v1 < v2) {
return -1;
} else {
return 0;
}
}

int compare_float(void *arg1, void *arg2)
{
float v1 = *(float *)arg1;
Expand Down
1 change: 1 addition & 0 deletions src/common/lang/comparator.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ See the Mulan PSL v2 for more details. */
namespace common {

int compare_int(void *arg1, void *arg2);
int compare_int64(void *arg1, void *arg2);
int compare_float(void *arg1, void *arg2);
int compare_string(void *arg1, int arg1_max_length, void *arg2, int arg2_max_length);

Expand Down
10 changes: 10 additions & 0 deletions src/observer/common/type/attr_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,13 @@ AttrType attr_type_from_string(const char *s)
}
return AttrType::UNDEFINED;
}

bool is_numerical_type(AttrType type)
{
return (type == AttrType::INTS || type == AttrType::FLOATS);
}

bool is_string_type(AttrType type)
{
return (type == AttrType::CHARS);
}
2 changes: 2 additions & 0 deletions src/observer/common/type/attr_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ enum class AttrType

const char *attr_type_to_string(AttrType type);
AttrType attr_type_from_string(const char *s);
bool is_numerical_type(AttrType type);
bool is_string_type(AttrType type);
3 changes: 3 additions & 0 deletions src/observer/common/type/data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ See the Mulan PSL v2 for more details. */
#include "common/type/data_type.h"
#include "common/type/vector_type.h"

// Todo: 实现新数据类型
// your code here

array<unique_ptr<DataType>, static_cast<int>(AttrType::MAXTYPE)> DataType::type_instances_ = {
make_unique<DataType>(AttrType::UNDEFINED),
make_unique<CharType>(),
Expand Down
4 changes: 4 additions & 0 deletions src/observer/common/type/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ See the Mulan PSL v2 for more details. */
#include "common/type/attr_type.h"

class Value;
class Column;

/**
* @brief 定义了数据类型相关的操作,比如比较运算、算术运算等
* @defgroup DataType
* @details 数据类型定义的算术运算中,比如 add、subtract 等,将按照当前数据类型设置最终结果值的类型。
* 参与运算的参数类型不一定相同,不同的类型进行运算是否能够支持需要参考各个类型的实现。
*/

class DataType
{
public:
Expand All @@ -47,6 +49,8 @@ class DataType
*/
virtual int compare(const Value &left, const Value &right) const { return INT32_MAX; }

virtual int compare(const Column &left, const Column &right, int left_idx, int right_idx) const { return INT32_MAX; }

/**
* @brief 计算 left + right,并将结果保存到 result 中
*/
Expand Down
11 changes: 10 additions & 1 deletion src/observer/common/type/float_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,25 @@ See the Mulan PSL v2 for more details. */
#include "common/value.h"
#include "common/lang/limits.h"
#include "common/value.h"
#include "storage/common/column.h"

int FloatType::compare(const Value &left, const Value &right) const
{
ASSERT(left.attr_type() == AttrType::FLOATS, "left type is not integer");
ASSERT(left.attr_type() == AttrType::FLOATS, "left type is not float");
ASSERT(right.attr_type() == AttrType::INTS || right.attr_type() == AttrType::FLOATS, "right type is not numeric");
float left_val = left.get_float();
float right_val = right.get_float();
return common::compare_float((void *)&left_val, (void *)&right_val);
}

int FloatType::compare(const Column &left, const Column &right, int left_idx, int right_idx) const
{
ASSERT(left.attr_type() == AttrType::FLOATS, "left type is not float");
ASSERT(right.attr_type() == AttrType::FLOATS, "right type is not float");
return common::compare_float((void *)&((float*)left.data())[left_idx],
(void *)&((float*)right.data())[right_idx]);
}

RC FloatType::add(const Value &left, const Value &right, Value &result) const
{
result.set_float(left.get_float() + right.get_float());
Expand Down
1 change: 1 addition & 0 deletions src/observer/common/type/float_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class FloatType : public DataType
virtual ~FloatType() = default;

int compare(const Value &left, const Value &right) const override;
int compare(const Column &left, const Column &right, int left_idx, int right_idx) const override;

RC add(const Value &left, const Value &right, Value &result) const override;
RC subtract(const Value &left, const Value &right, Value &result) const override;
Expand Down
9 changes: 9 additions & 0 deletions src/observer/common/type/integer_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the Mulan PSL v2 for more details. */
#include "common/log/log.h"
#include "common/type/integer_type.h"
#include "common/value.h"
#include "storage/common/column.h"

int IntegerType::compare(const Value &left, const Value &right) const
{
Expand All @@ -28,6 +29,14 @@ int IntegerType::compare(const Value &left, const Value &right) const
return INT32_MAX;
}

int IntegerType::compare(const Column &left, const Column &right, int left_idx, int right_idx) const
{
ASSERT(left.attr_type() == AttrType::INTS, "left type is not integer");
ASSERT(right.attr_type() == AttrType::INTS, "right type is not integer");
return common::compare_int((void *)&((int*)left.data())[left_idx],
(void *)&((int*)right.data())[right_idx]);
}

RC IntegerType::cast_to(const Value &val, AttrType type, Value &result) const
{
switch (type) {
Expand Down
11 changes: 11 additions & 0 deletions src/observer/common/type/integer_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class IntegerType : public DataType
virtual ~IntegerType() {}

int compare(const Value &left, const Value &right) const override;
int compare(const Column &left, const Column &right, int left_idx, int right_idx) const override;

RC add(const Value &left, const Value &right, Value &result) const override;
RC subtract(const Value &left, const Value &right, Value &result) const override;
Expand All @@ -31,6 +32,16 @@ class IntegerType : public DataType

RC cast_to(const Value &val, AttrType type, Value &result) const override;

int cast_cost(const AttrType type) override
{
if (type == AttrType::INTS) {
return 0;
} else if (type == AttrType::FLOATS) {
return 1;
}
return INT32_MAX;
}

RC set_value_from_str(Value &val, const string &data) const override;

RC to_string(const Value &val, string &result) const override;
Expand Down
124 changes: 124 additions & 0 deletions src/observer/common/type/string_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/* Copyright (c) 2021 OceanBase and/or its affiliates. All rights reserved.
miniob is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details. */

#pragma once

#include <iostream>
#include <cstring>

using namespace std;
struct string_t {
public:
static constexpr int INLINE_LENGTH = 12;

string_t() = default;

explicit string_t(uint32_t len) {
value.inlined.length = len;
}

string_t(const char *data, uint32_t len) {
init(data, len);
}

~string_t()
{
reset();
}

void init(const char *data, uint32_t len) {
value.inlined.length = len;
if (is_inlined()) {
memset(value.inlined.inlined, 0, INLINE_LENGTH);
if (size() == 0) {
return;
}
memcpy(value.inlined.inlined, data, size());
} else {
value.pointer.ptr = (char *)data;
}
}

void reset() {
if (is_inlined()) {
memset(value.inlined.inlined, 0, INLINE_LENGTH);
} else {
value.pointer.ptr = nullptr;
}
value.inlined.length = 0;
}

string_t(const char *data)
: string_t(data, strlen(data)) {
}
string_t(const string &value)
: string_t(value.c_str(), value.size()) {
}

bool is_inlined() const {
return size() <= INLINE_LENGTH;
}

const char *data() const {
return is_inlined() ? value.inlined.inlined : value.pointer.ptr;
}

char *get_data_writeable() const {
return is_inlined() ? (char *)value.inlined.inlined : value.pointer.ptr;
}

int size() const {
return value.inlined.length;
}

bool empty() const {
return value.inlined.length == 0;
}

string get_string() const {
return string(data(), size());
}

bool operator==(const string_t &r) const {
if (this->size() != r.size()) {
return false;
}
return (memcmp(this->data(), r.data(), this->size()) == 0);
}

bool operator!=(const string_t &r) const {
return !(*this == r);
}

bool operator>(const string_t &r) const {
const uint32_t left_length = this->size();
const uint32_t right_length = r.size();
const uint32_t min_length = std::min<uint32_t>(left_length, right_length);

auto memcmp_res = memcmp(this->data(), r.data(), min_length);
return memcmp_res > 0 || (memcmp_res == 0 && left_length > right_length);

}
bool operator<(const string_t &r) const {
return r > *this;
}

struct Inlined {
uint32_t length;
char inlined[12];
};
union {
struct {
uint32_t length;
char *ptr;
} pointer;
Inlined inlined;
} value;
};
Loading