说明

在数据切分处理中,特别是水平切粉,中间件最重要的两个过程就是数据切分和聚合.选择合适的切粉规则决定了数据聚合的难易程度,甚至可以避免夸库的数据聚合处理.

全局表

当业务中有些数据类似于数字字典,比如配置文件的配置,常用业务的配置或者数据量不大很少变动的表,并且大部分的业务场景都会用到,那么这种表适合于全局表.不需要对数据进行切粉.在每个分片上都保存一份数据即可.

mycat在join操作中.业务表与全局表进行join聚合优先会选择先同分片内的全局表join,避免夸库join.

在进行数据插入操作时.mycat会将数据分发到全局表对应的所有分片执行.

在进行数据读取时会随机获取一个节点读取数据.

使用

在两个分片中创建全局表

-- 在node1(125~127其中一个),和node2(128~130其中一个)创建配置表
CREATE TABLE `blog_config`  (
  `id` varchar(64) NOT NULL,
  `name` varchar(255) NULL,
  `value` varchar(255) NULL,
  primary key(id)
)charset="utf8";

mycat添加虚拟表

<!-- 设置全局表 -->
<!-- vi conf/schema.xml -->
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
    <table name="blog_contents" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true"/>
    <table name="blog_user" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true"/>
    <table name="blog_links" dataNode="dn1" primaryKey="id" autoIncrement="true" splitTableNames ="true"/>
    <table name="blog_links" dataNode="dn1.dn2" type="global"  primaryKey="id" autoIncrement="true" splitTableNames ="true"/>
</schema>

测试

-- mycat 插入数据
insert into blog_config(name,value) values('test','test');

-- 节点1(125) 查看
select * from blog_config;
+---------------------+------+-------+
| id                  | name | value |
+---------------------+------+-------+
| 8138463572327956546 | test | test  |
+---------------------+------+-------+
-- 节点2(130) 查看
select * from blog_config;
+---------------------+------+-------+
| id                  | name | value |
+---------------------+------+-------+
| 8138463572327956546 | test | test  |
+---------------------+------+-------+

分片表

对于文章内容与文章评论两个表来说, 评论表依赖于文章表.即存现表的主从关系,抽象出父子关系表,也就是文章内容根据文章主键id切分到不同的分片中,文章主键id对应的评论会与其分配到同一节点.避免数据夸库join操作.

数据表

-- 博客内容表
CREATE TABLE `blog_contents` (
  `id` int(10) unsigned NOT NULL,
  `title` varchar(150) DEFAULT '',
  `text` longtext,
  `image` varchar(255) DEFAULT '',
  `order` int(10) unsigned DEFAULT '0',
  `is_delete` tinyint(1) DEFAULT '0',
  `create_time` int(11) DEFAULT '0',
  `update_time` int(11) DEFAULT '0',
  primary key (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;


-- 博客评论表
CREATE TABLE `blog_comments` (
  `id` int(10) unsigned NOT NULL COMMENT '评论id',
  `parent_id` int(10) unsigned NOT NULL DEFAULT 0 COMMENT '回复评论id',
    `content_id` int(10) NOT NULL COMMENT '内容id',
  `user_id` int(10) NOT NULL COMMENT '用户id',
  `content` varchar(255) NOT NULL COMMENT '评论内容',
  `order` int(10) unsigned DEFAULT '0',
  `is_delete` tinyint(1) DEFAULT '0',
  `create_time` int(11) DEFAULT '0',
  `update_time` int(11) DEFAULT '0',
  primary key (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

使用

设置主键

sequnceHandlerType=4时修改如下配置.

# conf/sequence_conf.properties
BLOG_LINKS.HISIDS=
BLOG_LINKS.MINID=0
BLOG_LINKS.CURID=0
BLOG_LINKS.MAXID=0
BLOG_CONTENTS.HISIDS=
BLOG_CONTENTS.MINID=0
BLOG_CONTENTS.CURID=0
BLOG_CONTENTS.MAXID=0
BLOG_COMMENTS.HISIDS=
BLOG_COMMENTS.MINID=0
BLOG_COMMENTS.CURID=0
BLOG_COMMENTS.MAXID=0

设置虚拟表

<!-- 设置全局表 -->
<!-- vi conf/schema.xml -->
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
    <table name="blog_contents" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true">
        <childTable name="blog_comments" primaryKey="id" joinKey="content_id" parentKey="id" autoIncrement="true" />
    </table>
</schema>

插入文章数据

mycat中执行

insert into blog_contents
(title)
values
('mycat-入门'),
('mycat-主键'),
('mycat-分片');

select * from blog_contents;
+----+--------------+------+-------+-------+-----------+-------------+-------------+
| id | title        | text | image | order | is_delete | create_time | update_time |
+----+--------------+------+-------+-------+-----------+-------------+-------------+
|  4 | mycat-主键   | NULL |       |     0 |         0 |           0 |           0 |
|  3 | mycat-入门   | NULL |       |     0 |         0 |           0 |           0 |
|  5 | mycat-分片   | NULL |       |     0 |         0 |           0 |           0 |
+----+--------------+------+-------+-------+-----------+-------------+-------------+

插入评论数据

mycat中执行

多条语句出现错误ChildTable multi insert not provided,拆成单条语句即可

insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论3');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,1,'主键评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,2,'主键评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,2,'主键评论3');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,2,'分片评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,2,'分片评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,1,'分片评论3');

节点一查看数据(125~127)

select id,title from blog_contents;
+----+--------------+
| id | title        |
+----+--------------+
|  3 | mycat-入门   |
|  5 | mycat-分片   |
+----+--------------+

select content_id, content from blog_comments;
+------------+---------------+
| content_id | content       |
+------------+---------------+
|          3 | 入门评论1     |
|          3 | 入门评论2     |
|          3 | 入门评论3     |
|          5 | 分片评论1     |
|          5 | 分片评论2     |
|          5 | 分片评论3     |
+------------+---------------+

节点二查看数据(128~130)

select id,title from blog_contents;
+----+--------------+
| id | title        |
+----+--------------+
|  4 | mycat-主键   |
+----+--------------+

select content_id, content from blog_comments;
+------------+---------------+
| content_id | content       |
+------------+---------------+
|          4 | 主键评论1     |
|          4 | 主键评论2     |
|          4 | 主键评论3     |
+------------+---------------+

常用分片规则

字段取模

配置

增加自定义方法: vi conf/rule.xml, 原方法: mod-long

<!-- 仿照mod-lang方法 创建自己的分片规则 -->
<function name="my-mod-long" class="io.mycat.route.function.PartitionByMod">
        <!-- 2个节点, 即对主键id除以2进行求模 -->
        <property name="count">2</property>
</function>

增加自定义规则: vi conf/rule.xml, 原规则: mod-long

<tableRule name="custom-mod-long">
        <rule>
                      <!-- 主键字段为user_id -->
                <columns>user_id</columns>
                      <!-- 使用自定义的mod-long方法 -->
                <algorithm>my-mod-long</algorithm>
        </rule>
</tableRule>

演示

在两个mysql节点中创建数据表

CREATE TABLE `mod_long_test`  (
  `user_id` int(0) NOT NULL,
  `user_name` varchar(255) NOT NULL DEFAULT '',
    PRIMARY KEY (user_id)
)DEFAULT CHARSET=utf8;

设置mycat虚拟表

<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
        ...
        <table name="mod_long_test" dataNode="dn1,dn2" rule="custom-mod-long"  primaryKey="user_id" autoIncrement="true" splitTableNames ="true"/>
</schema>

mycat插入数据

insert into mod_long_test (`user_name`) 
values 
('test1'),('test2'),('test3'),('test4');

进入节点查看数据

-- node1
select * from blog.mod_long_test;
+---------+-----------+
| user_id | user_name |
+---------+-----------+
|       2 | test1     |
|       4 | test3     |
+---------+-----------+

-- node2
select * from blog.mod_long_test;
+---------+-----------+
| user_id | user_name |
+---------+-----------+
|       3 | test2     |
|       5 | test4     |
+---------+-----------+

分片枚举

配置

通过配置文件定义枚举id , vi conf/test_custom_enum.txt

定义枚举配置文件说明:

  • 分片1: 北京(10),天津(22).
  • 分片2: 上海(21),杭州(571).
10=0
21=1
22=0
571=1

增加自定义方法: vi conf/rule.xml, 原方法: hash-int

<function name="my-hash-int"
                          class="io.mycat.route.function.PartitionByFileMap">
              <!-- 自定义的分片文件,在mycat配置目录下 -->
        <property name="mapFile">test_custom_enum.txt</property>
</function>

增加自定义规则: vi conf/rule.xml, 原规则: sharding-by-intfile

<tableRule name="custom-sharding-by-intfile">
        <rule>
                      <!-- 自定义分片字段 --> 
                <columns>city_code</columns>
                      <!-- 自定义分片方法 -->
                <algorithm>my-hash-int</algorithm>
        </rule>
</tableRule>

演示

两个mysql节点中创建数据表

CREATE TABLE `enum_test`  (
  `id` int(0) auto_increment comment '为了方便省时使用数据库本地自增',
  `city_code` int(0) NOT NULL,
  `content` varchar(255) NOT NULL DEFAULT '',
  primary key (`id`)
)DEFAULT CHARSET=utf8;

mycat设置虚拟表

<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
        <table name="enum_test" dataNode="dn1,dn2" rule="custom-sharding-by-intfile" splitTableNames ="true"/>
</schema>

mycat插入数据 ,不支持批量插入

insert into enum_test (city_code,content) values (10, '北京内容');
insert into enum_test (city_code,content) values (571, '杭州内容');
insert into enum_test (city_code,content) values (21, '上海内容');
insert into enum_test (city_code,content) values (22, '天津内容');

进入节点查看数据

-- node1
select * from blog.enum_test;
+----+-----------+--------------+
| id | city_code | content      |
+----+-----------+--------------+
|  1 |        10 | 北京内容     |
|  4 |        22 | 天津内容     |
+----+-----------+--------------+

-- node2
select * from blog.enum_test;
+----+-----------+--------------+
| id | city_code | content      |
+----+-----------+--------------+
|  2 |       571 | 杭州内容     |
|  5 |        21 | 上海内容     |
+----+-----------+--------------+

范围分片

配置

通过配置文件定义范围分片, vi conf/test_range.txt

# K=1000,M=10000, 原配置文件中这么写的.0-15000, 15000-20000分片
0-15K=0
15K-2M=1

增加自定义方法: vi conf/rule.xml, 原方法: rang-long

<function name="my-rang-long"
                          class="io.mycat.route.function.AutoPartitionByLong">
        <property name="mapFile">test_range.txt</property>
</function>

增加自定义规则: vi conf/rule.xml.原规则: auto-sharding-long

<tableRule name="custom-auto-sharding-long">
        <rule>
                      <!-- 自定义分片字段 -->
                <columns>id</columns>
                      <!-- 自定义分片方法 -->
                <algorithm>my-rang-long</algorithm>
        </rule>
</tableRule>

演示

两个数据节点创建数据表

CREATE TABLE `auto_sharding`  (
  `id` int(0),
  primary key (`id`)
)DEFAULT CHARSET=utf8;

mycat创建虚拟表

<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
        <table name="auto_sharding" dataNode="dn1,dn2" rule="custom-auto-sharding-long" splitTableNames ="true"/>
</schema>

mycat插入数据

insert into auto_sharding (id) 
values 
(1),(1400),(15000),(15200);

节点查看数据

-- node1
select * from blog.auto_sharding;
+-------+
| id    |
+-------+
|     1 |
|  1400 |
| 15000 |
+-------+

-- node2
select * from blog.auto_sharding;
+-------+
| id    |
+-------+
| 15200 |
+-------+

日期分片

配置

增加自定义方法: vi conf/rule.xml 原方法: partbyday

<function name="my-partbyday"
                  class="io.mycat.route.function.PartitionByDate">
              <!-- 自定义日期格式 -->
        <property name="dateFormat">yyyy-MM-dd</property>
            <!-- 自然日 不知道干啥的 -->
        <property name="sNaturalDay">0</property>
              <!-- 开始日期 -->
        <property name="sBeginDate">2021-04-01</property>
        <!-- 分区天数,即默认从开始日期算起 10天一个分区 -->
        <property name="sPartionDay">10</property>
</function>

增加自定义规则: vi conf/rule.xml 原规则: sharding-by-date

<tableRule name="custom-sharding-by-date">
        <rule>
                      <!-- 自定义分片字段 -->
                <columns>create_time</columns>
                      <!-- 自定义分片方法 -->
                <algorithm>my-partbyday</algorithm>
        </rule>
</tableRule>

演示

两个节点创建数据表

CREATE TABLE `test_partbyday`  (
  `id` int(0),
  `create_time` datetime,
  primary key (`id`)
)DEFAULT CHARSET=utf8;

mycat创建虚拟表

<schema name="test_partbyday" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
        <table name="test_partbyday" dataNode="dn1,dn2" rule="custom-sharding-by-date" splitTableNames ="true"/>
</schema>

mycat 插入数据

-- 出现在节点1
insert into test_partbyday (id,create_time) value (1, "2021-04-01 01:12:13");
insert into test_partbyday (id,create_time) value (2, "2021-04-09 01:12:13");
insert into test_partbyday (id,create_time) value (3, "2021-04-10 01:12:13");

-- 出现在节点2
insert into test_partbyday (id,create_time) value (4, "2021-04-15 01:12:13");

-- 出现异常
insert into test_partbyday (id,create_time) value (5, "2021-04-21 01:12:13");
ERROR 1064 (HY000): Can't find a valid data node for specified node index :TEST_PARTBYDAY -> CREATE_TIME -> 2021-04-21 01:12:13 -> Index : 2

插入异常原因猜测:

没设置分片结束日期的时候: 当节点号 = (插入时间 - 分片开始时间) / 分区天数 > 分片数, 按常理会重新循环节点插入. 结果节点号=3的时候并没有转写到节点1中.

异常解决办法:为分片配置设置上结束时间为2021-04-20. 理论成立,开始实践

结束日期配置注意: 结束日期 = (开始日期-1) / 分片天数 * 节点数量.

修改自定义日期分片规则, 修改后如下:

<function name="my-partbyday"
                  class="io.mycat.route.function.PartitionByDate">
        <property name="dateFormat">yyyy-MM-dd</property>
        <property name="sNaturalDay">0</property>
        <property name="sBeginDate">2021-04-01</property>
              <!-- 新增结束日期 -->
        <property name="sEndDate">2021-04-20</property>
        <property name="sPartionDay">10</property>
</function>

继续测试插入数据. 注释意思为插入的数据存储的节点.不是在节点上执行,命令均在mycat中执行.

-- 节点1
insert into test_partbyday (id,create_time) value (5, "2021-04-21 01:12:13");
insert into test_partbyday (id,create_time) value (6, "2021-04-22 01:12:13");
insert into test_partbyday (id,create_time) value (7, "2021-04-22 11:12:13");

-- 节点2
insert into test_partbyday (id,create_time) value (8, "2021-05-08 01:12:13");
insert into test_partbyday (id,create_time) value (9, "2021-05-09 01:12:13");
insert into test_partbyday (id,create_time) value (10, "2021-05-10 01:12:13");

成功~

自然月分片

规则中的方法开始时间很重要! 当插入数据的年份与配置的开始时间年份不一样会出现错误.配置没有结束时间,不会循环插入.所以只能一年一配置

配置

查看方法: vi conf/rule.xml

<tableRule name="sharding-by-month">
        <rule>
                <columns>create_time</columns>
                <algorithm>partbymonth</algorithm>
        </rule>
</tableRule>

查看规则: vi conf/rule.xml

<function name="partbymonth"
                  class="io.mycat.route.function.PartitionByMonth">
        <property name="dateFormat">yyyy-MM-dd</property>
              <!-- 注意插入的值年份要和这里的年份一样 -->
        <property name="sBeginDate">2021-01-01</property>
</function>

演示

创建12个月节点: vi conf/schema.xml

<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
        <table name="blog_orders" dataNode="order_dn$1-12" rule="sharding-by-month" splitTableNames ="true"/>
</schema>

<dataNode name="order_dn1" dataHost="pxc1" database="blog_order1" />
<dataNode name="order_dn2" dataHost="pxc1" database="blog_order2" />
<dataNode name="order_dn3" dataHost="pxc1" database="blog_order3" />
<dataNode name="order_dn4" dataHost="pxc1" database="blog_order4" />
<dataNode name="order_dn5" dataHost="pxc1" database="blog_order5" />
<dataNode name="order_dn6" dataHost="pxc1" database="blog_order6" />
<dataNode name="order_dn7" dataHost="pxc2" database="blog_order7" />
<dataNode name="order_dn8" dataHost="pxc2" database="blog_order8" />
<dataNode name="order_dn9" dataHost="pxc2" database="blog_order9" />
<dataNode name="order_dn10" dataHost="pxc2" database="blog_order10" />
<dataNode name="order_dn11" dataHost="pxc2" database="blog_order11" />
<dataNode name="order_dn12" dataHost="pxc2" database="blog_order12" />

进入节点1创建blog_order1~6, 节点2创建blog_order7~12. 并创建数据表

-- 创建库
create database blog_order1;
...

-- 创建表
CREATE TABLE `blog_orders`  (
  `user_id` int(0) NOT NULL,
  `create_time` varchar(255) NOT NULL DEFAULT '',
    PRIMARY KEY (user_id)
)DEFAULT CHARSET=utf8;

mycat 插入数据测试

insert into blog_orders (user_id, create_time) value (1, "2021-01-01");
insert into blog_orders (user_id, create_time) value (2, "2021-02-01");
insert into blog_orders (user_id, create_time) value (3, "2021-03-01");
insert into blog_orders (user_id, create_time) value (4, "2021-04-01");
insert into blog_orders (user_id, create_time) value (5, "2021-05-01");
insert into blog_orders (user_id, create_time) value (6, "2021-06-01");
insert into blog_orders (user_id, create_time) value (7, "2021-07-01");
insert into blog_orders (user_id, create_time) value (8, "2021-08-01");
insert into blog_orders (user_id, create_time) value (9, "2021-09-01");
insert into blog_orders (user_id, create_time) value (10, "2021-10-01");
insert into blog_orders (user_id, create_time) value (11, "2021-11-01");
insert into blog_orders (user_id, create_time) value (12, "2021-12-01");
insert into blog_orders (user_id, create_time) value (13, "2021-12-01 23:01:01");

节点查看数据

mysql> select * from blog_order12.blog_orders;
+---------+---------------------+
| user_id | create_time         |
+---------+---------------------+
|      12 | 2021-12-01          |
|      13 | 2021-12-01 23:01:01 |
+---------+---------------------+

mysql> select * from blog_order7.blog_orders;
+---------+-------------+
| user_id | create_time |
+---------+-------------+
|       7 | 2021-07-01  |
+---------+-------------+
Last modification:April 18th, 2021 at 02:06 am