说明
在数据切分处理中,特别是水平切粉,中间件最重要的两个过程就是数据切分和聚合.选择合适的切粉规则决定了数据聚合的难易程度,甚至可以避免夸库的数据聚合处理.
全局表
当业务中有些数据类似于数字字典,比如配置文件的配置,常用业务的配置或者数据量不大很少变动的表,并且大部分的业务场景都会用到,那么这种表适合于全局表.不需要对数据进行切粉.在每个分片上都保存一份数据即可.mycat在join操作中.业务表与全局表进行join聚合优先会选择先同分片内的全局表join,避免夸库join.
在进行数据插入操作时.mycat会将数据分发到全局表对应的所有分片执行.
在进行数据读取时会随机获取一个节点读取数据.
使用
在两个分片中创建全局表
-- 在node1(125~127其中一个),和node2(128~130其中一个)创建配置表
CREATE TABLE `blog_config` (
`id` varchar(64) NOT NULL,
`name` varchar(255) NULL,
`value` varchar(255) NULL,
primary key(id)
)charset="utf8";
mycat添加虚拟表
<!-- 设置全局表 -->
<!-- vi conf/schema.xml -->
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="blog_contents" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true"/>
<table name="blog_user" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true"/>
<table name="blog_links" dataNode="dn1" primaryKey="id" autoIncrement="true" splitTableNames ="true"/>
<table name="blog_links" dataNode="dn1.dn2" type="global" primaryKey="id" autoIncrement="true" splitTableNames ="true"/>
</schema>
测试
-- mycat 插入数据
insert into blog_config(name,value) values('test','test');
-- 节点1(125) 查看
select * from blog_config;
+---------------------+------+-------+
| id | name | value |
+---------------------+------+-------+
| 8138463572327956546 | test | test |
+---------------------+------+-------+
-- 节点2(130) 查看
select * from blog_config;
+---------------------+------+-------+
| id | name | value |
+---------------------+------+-------+
| 8138463572327956546 | test | test |
+---------------------+------+-------+
分片表
对于文章内容与文章评论两个表来说, 评论表依赖于文章表.即存现表的主从关系,抽象出父子关系表,也就是文章内容根据文章主键id切分到不同的分片中,文章主键id对应的评论会与其分配到同一节点.避免数据夸库join操作.
数据表
-- 博客内容表
CREATE TABLE `blog_contents` (
`id` int(10) unsigned NOT NULL,
`title` varchar(150) DEFAULT '',
`text` longtext,
`image` varchar(255) DEFAULT '',
`order` int(10) unsigned DEFAULT '0',
`is_delete` tinyint(1) DEFAULT '0',
`create_time` int(11) DEFAULT '0',
`update_time` int(11) DEFAULT '0',
primary key (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- 博客评论表
CREATE TABLE `blog_comments` (
`id` int(10) unsigned NOT NULL COMMENT '评论id',
`parent_id` int(10) unsigned NOT NULL DEFAULT 0 COMMENT '回复评论id',
`content_id` int(10) NOT NULL COMMENT '内容id',
`user_id` int(10) NOT NULL COMMENT '用户id',
`content` varchar(255) NOT NULL COMMENT '评论内容',
`order` int(10) unsigned DEFAULT '0',
`is_delete` tinyint(1) DEFAULT '0',
`create_time` int(11) DEFAULT '0',
`update_time` int(11) DEFAULT '0',
primary key (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
使用
设置主键
sequnceHandlerType=4时修改如下配置.
# conf/sequence_conf.properties
BLOG_LINKS.HISIDS=
BLOG_LINKS.MINID=0
BLOG_LINKS.CURID=0
BLOG_LINKS.MAXID=0
BLOG_CONTENTS.HISIDS=
BLOG_CONTENTS.MINID=0
BLOG_CONTENTS.CURID=0
BLOG_CONTENTS.MAXID=0
BLOG_COMMENTS.HISIDS=
BLOG_COMMENTS.MINID=0
BLOG_COMMENTS.CURID=0
BLOG_COMMENTS.MAXID=0
设置虚拟表
<!-- 设置全局表 -->
<!-- vi conf/schema.xml -->
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="blog_contents" dataNode="dn1,dn2" rule="mod-long" splitTableNames ="true">
<childTable name="blog_comments" primaryKey="id" joinKey="content_id" parentKey="id" autoIncrement="true" />
</table>
</schema>
插入文章数据
insert into blog_contents
(title)
values
('mycat-入门'),
('mycat-主键'),
('mycat-分片');
select * from blog_contents;
+----+--------------+------+-------+-------+-----------+-------------+-------------+
| id | title | text | image | order | is_delete | create_time | update_time |
+----+--------------+------+-------+-------+-----------+-------------+-------------+
| 4 | mycat-主键 | NULL | | 0 | 0 | 0 | 0 |
| 3 | mycat-入门 | NULL | | 0 | 0 | 0 | 0 |
| 5 | mycat-分片 | NULL | | 0 | 0 | 0 | 0 |
+----+--------------+------+-------+-------+-----------+-------------+-------------+
插入评论数据
ChildTable multi insert not provided
,拆成单条语句即可insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,3,1,'入门评论3');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,1,'主键评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,2,'主键评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,4,2,'主键评论3');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,2,'分片评论1');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,2,'分片评论2');
insert into blog_comments (parent_id,content_id,user_id,content) values (0,5,1,'分片评论3');
节点一查看数据(125~127)
select id,title from blog_contents;
+----+--------------+
| id | title |
+----+--------------+
| 3 | mycat-入门 |
| 5 | mycat-分片 |
+----+--------------+
select content_id, content from blog_comments;
+------------+---------------+
| content_id | content |
+------------+---------------+
| 3 | 入门评论1 |
| 3 | 入门评论2 |
| 3 | 入门评论3 |
| 5 | 分片评论1 |
| 5 | 分片评论2 |
| 5 | 分片评论3 |
+------------+---------------+
节点二查看数据(128~130)
select id,title from blog_contents;
+----+--------------+
| id | title |
+----+--------------+
| 4 | mycat-主键 |
+----+--------------+
select content_id, content from blog_comments;
+------------+---------------+
| content_id | content |
+------------+---------------+
| 4 | 主键评论1 |
| 4 | 主键评论2 |
| 4 | 主键评论3 |
+------------+---------------+
常用分片规则
字段取模
配置
增加自定义方法: vi conf/rule.xml
, 原方法: mod-long
<!-- 仿照mod-lang方法 创建自己的分片规则 -->
<function name="my-mod-long" class="io.mycat.route.function.PartitionByMod">
<!-- 2个节点, 即对主键id除以2进行求模 -->
<property name="count">2</property>
</function>
增加自定义规则: vi conf/rule.xml
, 原规则: mod-long
<tableRule name="custom-mod-long">
<rule>
<!-- 主键字段为user_id -->
<columns>user_id</columns>
<!-- 使用自定义的mod-long方法 -->
<algorithm>my-mod-long</algorithm>
</rule>
</tableRule>
演示
在两个mysql节点中创建数据表
CREATE TABLE `mod_long_test` (
`user_id` int(0) NOT NULL,
`user_name` varchar(255) NOT NULL DEFAULT '',
PRIMARY KEY (user_id)
)DEFAULT CHARSET=utf8;
设置mycat虚拟表
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
...
<table name="mod_long_test" dataNode="dn1,dn2" rule="custom-mod-long" primaryKey="user_id" autoIncrement="true" splitTableNames ="true"/>
</schema>
mycat插入数据
insert into mod_long_test (`user_name`)
values
('test1'),('test2'),('test3'),('test4');
进入节点查看数据
-- node1
select * from blog.mod_long_test;
+---------+-----------+
| user_id | user_name |
+---------+-----------+
| 2 | test1 |
| 4 | test3 |
+---------+-----------+
-- node2
select * from blog.mod_long_test;
+---------+-----------+
| user_id | user_name |
+---------+-----------+
| 3 | test2 |
| 5 | test4 |
+---------+-----------+
分片枚举
配置
通过配置文件定义枚举id , vi conf/test_custom_enum.txt
定义枚举配置文件说明:
- 分片1: 北京(10),天津(22).
- 分片2: 上海(21),杭州(571).
10=0
21=1
22=0
571=1
增加自定义方法: vi conf/rule.xml
, 原方法: hash-int
<function name="my-hash-int"
class="io.mycat.route.function.PartitionByFileMap">
<!-- 自定义的分片文件,在mycat配置目录下 -->
<property name="mapFile">test_custom_enum.txt</property>
</function>
增加自定义规则: vi conf/rule.xml
, 原规则: sharding-by-intfile
<tableRule name="custom-sharding-by-intfile">
<rule>
<!-- 自定义分片字段 -->
<columns>city_code</columns>
<!-- 自定义分片方法 -->
<algorithm>my-hash-int</algorithm>
</rule>
</tableRule>
演示
两个mysql节点中创建数据表
CREATE TABLE `enum_test` (
`id` int(0) auto_increment comment '为了方便省时使用数据库本地自增',
`city_code` int(0) NOT NULL,
`content` varchar(255) NOT NULL DEFAULT '',
primary key (`id`)
)DEFAULT CHARSET=utf8;
mycat设置虚拟表
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="enum_test" dataNode="dn1,dn2" rule="custom-sharding-by-intfile" splitTableNames ="true"/>
</schema>
mycat插入数据 ,不支持批量插入
insert into enum_test (city_code,content) values (10, '北京内容');
insert into enum_test (city_code,content) values (571, '杭州内容');
insert into enum_test (city_code,content) values (21, '上海内容');
insert into enum_test (city_code,content) values (22, '天津内容');
进入节点查看数据
-- node1
select * from blog.enum_test;
+----+-----------+--------------+
| id | city_code | content |
+----+-----------+--------------+
| 1 | 10 | 北京内容 |
| 4 | 22 | 天津内容 |
+----+-----------+--------------+
-- node2
select * from blog.enum_test;
+----+-----------+--------------+
| id | city_code | content |
+----+-----------+--------------+
| 2 | 571 | 杭州内容 |
| 5 | 21 | 上海内容 |
+----+-----------+--------------+
范围分片
配置
通过配置文件定义范围分片, vi conf/test_range.txt
# K=1000,M=10000, 原配置文件中这么写的.0-15000, 15000-20000分片
0-15K=0
15K-2M=1
增加自定义方法: vi conf/rule.xml
, 原方法: rang-long
<function name="my-rang-long"
class="io.mycat.route.function.AutoPartitionByLong">
<property name="mapFile">test_range.txt</property>
</function>
增加自定义规则: vi conf/rule.xml
.原规则: auto-sharding-long
<tableRule name="custom-auto-sharding-long">
<rule>
<!-- 自定义分片字段 -->
<columns>id</columns>
<!-- 自定义分片方法 -->
<algorithm>my-rang-long</algorithm>
</rule>
</tableRule>
演示
两个数据节点创建数据表
CREATE TABLE `auto_sharding` (
`id` int(0),
primary key (`id`)
)DEFAULT CHARSET=utf8;
mycat创建虚拟表
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="auto_sharding" dataNode="dn1,dn2" rule="custom-auto-sharding-long" splitTableNames ="true"/>
</schema>
mycat插入数据
insert into auto_sharding (id)
values
(1),(1400),(15000),(15200);
节点查看数据
-- node1
select * from blog.auto_sharding;
+-------+
| id |
+-------+
| 1 |
| 1400 |
| 15000 |
+-------+
-- node2
select * from blog.auto_sharding;
+-------+
| id |
+-------+
| 15200 |
+-------+
日期分片
配置
增加自定义方法: vi conf/rule.xml
原方法: partbyday
<function name="my-partbyday"
class="io.mycat.route.function.PartitionByDate">
<!-- 自定义日期格式 -->
<property name="dateFormat">yyyy-MM-dd</property>
<!-- 自然日 不知道干啥的 -->
<property name="sNaturalDay">0</property>
<!-- 开始日期 -->
<property name="sBeginDate">2021-04-01</property>
<!-- 分区天数,即默认从开始日期算起 10天一个分区 -->
<property name="sPartionDay">10</property>
</function>
增加自定义规则: vi conf/rule.xml
原规则: sharding-by-date
<tableRule name="custom-sharding-by-date">
<rule>
<!-- 自定义分片字段 -->
<columns>create_time</columns>
<!-- 自定义分片方法 -->
<algorithm>my-partbyday</algorithm>
</rule>
</tableRule>
演示
两个节点创建数据表
CREATE TABLE `test_partbyday` (
`id` int(0),
`create_time` datetime,
primary key (`id`)
)DEFAULT CHARSET=utf8;
mycat创建虚拟表
<schema name="test_partbyday" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="test_partbyday" dataNode="dn1,dn2" rule="custom-sharding-by-date" splitTableNames ="true"/>
</schema>
mycat 插入数据
-- 出现在节点1
insert into test_partbyday (id,create_time) value (1, "2021-04-01 01:12:13");
insert into test_partbyday (id,create_time) value (2, "2021-04-09 01:12:13");
insert into test_partbyday (id,create_time) value (3, "2021-04-10 01:12:13");
-- 出现在节点2
insert into test_partbyday (id,create_time) value (4, "2021-04-15 01:12:13");
-- 出现异常
insert into test_partbyday (id,create_time) value (5, "2021-04-21 01:12:13");
ERROR 1064 (HY000): Can't find a valid data node for specified node index :TEST_PARTBYDAY -> CREATE_TIME -> 2021-04-21 01:12:13 -> Index : 2
没设置分片结束日期的时候: 当节点号 = (插入时间 - 分片开始时间) / 分区天数 > 分片数
, 按常理会重新循环节点插入. 结果节点号=3
的时候并没有转写到节点1中.
2021-04-20
. 理论成立,开始实践结束日期 = (开始日期-1) / 分片天数 * 节点数量
. 修改自定义日期分片规则, 修改后如下:
<function name="my-partbyday"
class="io.mycat.route.function.PartitionByDate">
<property name="dateFormat">yyyy-MM-dd</property>
<property name="sNaturalDay">0</property>
<property name="sBeginDate">2021-04-01</property>
<!-- 新增结束日期 -->
<property name="sEndDate">2021-04-20</property>
<property name="sPartionDay">10</property>
</function>
继续测试插入数据. 注释意思为插入的数据存储的节点.不是在节点上执行,命令均在mycat中执行.
-- 节点1
insert into test_partbyday (id,create_time) value (5, "2021-04-21 01:12:13");
insert into test_partbyday (id,create_time) value (6, "2021-04-22 01:12:13");
insert into test_partbyday (id,create_time) value (7, "2021-04-22 11:12:13");
-- 节点2
insert into test_partbyday (id,create_time) value (8, "2021-05-08 01:12:13");
insert into test_partbyday (id,create_time) value (9, "2021-05-09 01:12:13");
insert into test_partbyday (id,create_time) value (10, "2021-05-10 01:12:13");
自然月分片
配置没有结束时间,不会循环插入.所以只能一年一配置
配置
查看方法: vi conf/rule.xml
<tableRule name="sharding-by-month">
<rule>
<columns>create_time</columns>
<algorithm>partbymonth</algorithm>
</rule>
</tableRule>
查看规则: vi conf/rule.xml
<function name="partbymonth"
class="io.mycat.route.function.PartitionByMonth">
<property name="dateFormat">yyyy-MM-dd</property>
<!-- 注意插入的值年份要和这里的年份一样 -->
<property name="sBeginDate">2021-01-01</property>
</function>
演示
创建12个月节点: vi conf/schema.xml
<schema name="blog" checkSQLschema="true" sqlMaxLimit="100" randomDataNode="dn1">
<table name="blog_orders" dataNode="order_dn$1-12" rule="sharding-by-month" splitTableNames ="true"/>
</schema>
<dataNode name="order_dn1" dataHost="pxc1" database="blog_order1" />
<dataNode name="order_dn2" dataHost="pxc1" database="blog_order2" />
<dataNode name="order_dn3" dataHost="pxc1" database="blog_order3" />
<dataNode name="order_dn4" dataHost="pxc1" database="blog_order4" />
<dataNode name="order_dn5" dataHost="pxc1" database="blog_order5" />
<dataNode name="order_dn6" dataHost="pxc1" database="blog_order6" />
<dataNode name="order_dn7" dataHost="pxc2" database="blog_order7" />
<dataNode name="order_dn8" dataHost="pxc2" database="blog_order8" />
<dataNode name="order_dn9" dataHost="pxc2" database="blog_order9" />
<dataNode name="order_dn10" dataHost="pxc2" database="blog_order10" />
<dataNode name="order_dn11" dataHost="pxc2" database="blog_order11" />
<dataNode name="order_dn12" dataHost="pxc2" database="blog_order12" />
进入节点1创建blog_order1~6, 节点2创建blog_order7~12. 并创建数据表
-- 创建库
create database blog_order1;
...
-- 创建表
CREATE TABLE `blog_orders` (
`user_id` int(0) NOT NULL,
`create_time` varchar(255) NOT NULL DEFAULT '',
PRIMARY KEY (user_id)
)DEFAULT CHARSET=utf8;
mycat 插入数据测试
insert into blog_orders (user_id, create_time) value (1, "2021-01-01");
insert into blog_orders (user_id, create_time) value (2, "2021-02-01");
insert into blog_orders (user_id, create_time) value (3, "2021-03-01");
insert into blog_orders (user_id, create_time) value (4, "2021-04-01");
insert into blog_orders (user_id, create_time) value (5, "2021-05-01");
insert into blog_orders (user_id, create_time) value (6, "2021-06-01");
insert into blog_orders (user_id, create_time) value (7, "2021-07-01");
insert into blog_orders (user_id, create_time) value (8, "2021-08-01");
insert into blog_orders (user_id, create_time) value (9, "2021-09-01");
insert into blog_orders (user_id, create_time) value (10, "2021-10-01");
insert into blog_orders (user_id, create_time) value (11, "2021-11-01");
insert into blog_orders (user_id, create_time) value (12, "2021-12-01");
insert into blog_orders (user_id, create_time) value (13, "2021-12-01 23:01:01");
节点查看数据
mysql> select * from blog_order12.blog_orders;
+---------+---------------------+
| user_id | create_time |
+---------+---------------------+
| 12 | 2021-12-01 |
| 13 | 2021-12-01 23:01:01 |
+---------+---------------------+
mysql> select * from blog_order7.blog_orders;
+---------+-------------+
| user_id | create_time |
+---------+-------------+
| 7 | 2021-07-01 |
+---------+-------------+