RocketMQ-06丨消息存储

Posted by jiefang on March 23, 2021

消息存储

消息存储

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
public class DefaultMessageStore implements MessageStore {
    private static final InternalLogger log = InternalLoggerFactory.getLogger(LoggerName.STORE_LOGGER_NAME);
	//消息存储配置属性
    private final MessageStoreConfig messageStoreConfig;
    //CommitLog文件存储实现类
    private final CommitLog commitLog;
	//消息队列缓存表,按消息主题分组
    private final ConcurrentMap<String/* topic */, ConcurrentMap<Integer/* queueId */, ConsumeQueue>> consumeQueueTable;
	//消息队列文件ConsumeQueue刷盘线程
    private final FlushConsumeQueueService flushConsumeQueueService;
	//清除CommitLog文件服务
    private final CleanCommitLogService cleanCommitLogService;
	//清除ConsumeQueue文件服务
    private final CleanConsumeQueueService cleanConsumeQueueService;
	//索引文件实现类
    private final IndexService indexService;
	//MappedFile分配服务
    private final AllocateMappedFileService allocateMappedFileService;
	//CommitLog消息分发,根据CommitLog文件构建ConsumeQueue、IndexFile文件
    private final ReputMessageService reputMessageService;
	//存储HA机制
    private final HAService haService;
	//
    private final ScheduleMessageService scheduleMessageService;

    private final StoreStatsService storeStatsService;
	//消息堆内存缓存
    private final TransientStorePool transientStorePool;

    private final RunningFlags runningFlags = new RunningFlags();
    private final SystemClock systemClock = new SystemClock();

    private final ScheduledExecutorService scheduledExecutorService =
        Executors.newSingleThreadScheduledExecutor(new ThreadFactoryImpl("StoreScheduledThread"));
    private final BrokerStatsManager brokerStatsManager;
    //消息拉取长轮询模式消息达到监听器
    private final MessageArrivingListener messageArrivingListener;
    //Broker配置属性
    private final BrokerConfig brokerConfig;

    private volatile boolean shutdown = true;
	//文件刷盘监测点
    private StoreCheckpoint storeCheckpoint;

    private AtomicLong printTimes = new AtomicLong(0);
	//CommitLog 文件转发请求
    private final LinkedList<CommitLogDispatcher> dispatcherList;

    private RandomAccessFile lockFile;

    private FileLock lock;

    boolean shutDownNormal = false;
}

消息发送存储流程

DefaultMessageStore#putMessage()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
@Override
public PutMessageResult putMessage(MessageExtBrokerInner msg) {
    //如果当前Broker停止工作或Broker SLAVE角色或当前Rocket不支持写入则拒绝消息写入
    PutMessageStatus checkStoreStatus = this.checkStoreStatus();
    if (checkStoreStatus != PutMessageStatus.PUT_OK) {
        return new PutMessageResult(checkStoreStatus, null);
    }
	//校验消息主题和消息属性长度
    PutMessageStatus msgCheckStatus = this.checkMessage(msg);
    if (msgCheckStatus == PutMessageStatus.MESSAGE_ILLEGAL) {
        return new PutMessageResult(msgCheckStatus, null);
    }

    long beginTime = this.getSystemClock().now();
    PutMessageResult result = this.commitLog.putMessage(msg);
    long elapsedTime = this.getSystemClock().now() - beginTime;
    if (elapsedTime > 500) {
        log.warn("not in lock elapsed time(ms)={}, bodyLength={}", elapsedTime, msg.getBody().length);
    }

    this.storeStatsService.setPutMessageEntireTimeMax(elapsedTime);

    if (null == result || !result.isOk()) {
        this.storeStatsService.getPutMessageFailedTimes().incrementAndGet();
    }

    return result;
}

DefaultMessageStore#checkStoreStatus()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
//判断当前Broker是否支持消息写入
private PutMessageStatus checkStoreStatus() {
    if (this.shutdown) {
        log.warn("message store has shutdown, so putMessage is forbidden");
        return PutMessageStatus.SERVICE_NOT_AVAILABLE;
    }

    if (BrokerRole.SLAVE == this.messageStoreConfig.getBrokerRole()) {
        long value = this.printTimes.getAndIncrement();
        if ((value % 50000) == 0) {
            log.warn("broke role is slave, so putMessage is forbidden");
        }
        return PutMessageStatus.SERVICE_NOT_AVAILABLE;
    }

    if (!this.runningFlags.isWriteable()) {
        long value = this.printTimes.getAndIncrement();
        if ((value % 50000) == 0) {
            log.warn("the message store is not writable. It may be caused by one of the following reasons: " +
                "the broker's disk is full, write to logic queue error, write to index file error, etc");
        }
        return PutMessageStatus.SERVICE_NOT_AVAILABLE;
    } else {
        this.printTimes.set(0);
    }

    if (this.isOSPageCacheBusy()) {
        return PutMessageStatus.OS_PAGECACHE_BUSY;
    }
    return PutMessageStatus.PUT_OK;
}

DefaultMessageStore#checkMessage()

1
2
3
4
5
6
7
8
9
10
11
12
13
private PutMessageStatus checkMessage(MessageExtBrokerInner msg) {
    //校验消息主题长度
    if (msg.getTopic().length() > Byte.MAX_VALUE) {
        log.warn("putMessage message topic length too long " + msg.getTopic().length());
        return PutMessageStatus.MESSAGE_ILLEGAL;
    }
	//校验消息属性长度
    if (msg.getPropertiesString() != null && msg.getPropertiesString().length() > Short.MAX_VALUE) {
        log.warn("putMessage message properties length too long " + msg.getPropertiesString().length());
        return PutMessageStatus.MESSAGE_ILLEGAL;
    }
    return PutMessageStatus.PUT_OK;
}

CommitLog#putMessage()

RocketMQ 物理文件的组织方式如图:

Commitlog 文件存储 为${ROCKET_HOME}/store/commitlog 目录,每一个文件默 lG 个文件写满后再创 另外一个,以该文件中第一个偏移量为文件名,偏移量小于 20 位用0补齐,。第一个文件的初始偏移量为0 ,第二文件的 1073741824 ,代表该文件中的第一条消息的物理偏移 1073741824 ,这样根据物理偏移量能快速定位到消息。 MappedFileQueue 可以 是${ROCKET_HOME}/store/commitlog文件夹,而 MappedFile 则对应该文件夹下一个个的文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
public PutMessageResult putMessage(final MessageExtBrokerInner msg) {
    // Set the storage time
    msg.setStoreTimestamp(System.currentTimeMillis());
    // Set the message body BODY CRC (consider the most appropriate setting
    // on the client)
    msg.setBodyCRC(UtilAll.crc32(msg.getBody()));
    // Back to Results
    AppendMessageResult result = null;

    StoreStatsService storeStatsService = this.defaultMessageStore.getStoreStatsService();

    String topic = msg.getTopic();
    int queueId = msg.getQueueId();

    final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag());
    if (tranType == MessageSysFlag.TRANSACTION_NOT_TYPE
        || tranType == MessageSysFlag.TRANSACTION_COMMIT_TYPE) {
        // Delay Delivery//延迟消息处理
        if (msg.getDelayTimeLevel() > 0) {
            if (msg.getDelayTimeLevel() > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
                msg.setDelayTimeLevel(this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel());
            }

            topic = TopicValidator.RMQ_SYS_SCHEDULE_TOPIC;
            queueId = ScheduleMessageService.delayLevel2QueueId(msg.getDelayTimeLevel());

            // Backup real topic, queueId,保存消息原主题和原队列ID至消息属性
            MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_TOPIC, msg.getTopic());
            MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_QUEUE_ID, String.valueOf(msg.getQueueId()));
            msg.setPropertiesString(MessageDecoder.messageProperties2String(msg.getProperties()));
            //消息使用延迟消息主题和延迟消息队列
            msg.setTopic(topic);
            msg.setQueueId(queueId);
        }
    }

    InetSocketAddress bornSocketAddress = (InetSocketAddress) msg.getBornHost();
    if (bornSocketAddress.getAddress() instanceof Inet6Address) {
        msg.setBornHostV6Flag();
    }

    InetSocketAddress storeSocketAddress = (InetSocketAddress) msg.getStoreHost();
    if (storeSocketAddress.getAddress() instanceof Inet6Address) {
        msg.setStoreHostAddressV6Flag();
    }

    long elapsedTimeInLock = 0;

    MappedFile unlockMappedFile = null;
    //获取当前可以写入的CommitLog文件
    MappedFile mappedFile = this.mappedFileQueue.getLastMappedFile();
	//默认使用自旋锁实现
    putMessageLock.lock(); //spin or ReentrantLock ,depending on store config
    try {
        long beginLockTimestamp = this.defaultMessageStore.getSystemClock().now();
        this.beginTimeInLock = beginLockTimestamp;

        // Here settings are stored timestamp, in order to ensure an orderly
        // global
        msg.setStoreTimestamp(beginLockTimestamp);
		//如果当前CommitLog不存在说明是第一次消息发送或者文件已满,创建新的CommitLog
        if (null == mappedFile || mappedFile.isFull()) {
            mappedFile = this.mappedFileQueue.getLastMappedFile(0); // Mark: NewFile may be cause noise
        }
        //仍然为空,创建CommitLog失败
        if (null == mappedFile) {
            log.error("create mapped file1 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
            beginTimeInLock = 0;
            return new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, null);
        }
		//消息追加到MappedFile中
        result = mappedFile.appendMessage(msg, this.appendMessageCallback);
        switch (result.getStatus()) {
            //消息追加成功
            case PUT_OK:
                break;
            //超过文件大小,创建新的CommitLog,重新追加到新的文件中    
            case END_OF_FILE:
                unlockMappedFile = mappedFile;
                // Create a new file, re-write the message
                mappedFile = this.mappedFileQueue.getLastMappedFile(0);
                if (null == mappedFile) {
                    // XXX: warn and notify me
                    log.error("create mapped file2 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
                    beginTimeInLock = 0;
                    return new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, result);
                }
                result = mappedFile.appendMessage(msg, this.appendMessageCallback);
                break;
            case MESSAGE_SIZE_EXCEEDED:
            case PROPERTIES_SIZE_EXCEEDED:
                beginTimeInLock = 0;
                return new PutMessageResult(PutMessageStatus.MESSAGE_ILLEGAL, result);
            case UNKNOWN_ERROR:
                beginTimeInLock = 0;
                return new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result);
            default:
                beginTimeInLock = 0;
                return new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result);
        }

        elapsedTimeInLock = this.defaultMessageStore.getSystemClock().now() - beginLockTimestamp;
        beginTimeInLock = 0;
    } finally {
        //释放锁
        putMessageLock.unlock();
    }

    if (elapsedTimeInLock > 500) {
        log.warn("[NOTIFYME]putMessage in lock cost time(ms)={}, bodyLength={} AppendMessageResult={}", elapsedTimeInLock, msg.getBody().length, result);
    }

    if (null != unlockMappedFile && this.defaultMessageStore.getMessageStoreConfig().isWarmMapedFileEnable()) {
        this.defaultMessageStore.unlockMappedFile(unlockMappedFile);
    }

    PutMessageResult putMessageResult = new PutMessageResult(PutMessageStatus.PUT_OK, result);

    // Statistics
    storeStatsService.getSinglePutMessageTopicTimesTotal(msg.getTopic()).incrementAndGet();
    storeStatsService.getSinglePutMessageTopicSizeTotal(topic).addAndGet(result.getWroteBytes());
	//同步刷盘和异步刷盘
    handleDiskFlush(result, putMessageResult, msg);
    //处理同步刷盘主从复制
    handleHA(result, putMessageResult, msg);

    return putMessageResult;
}
MappedFile#appendMessagesInner()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
public AppendMessageResult appendMessagesInner(final MessageExt messageExt, final AppendMessageCallback cb) {
    assert messageExt != null;
    assert cb != null;

    int currentPos = this.wrotePosition.get();
	//如果currentPos大于或等于文件大小说明文件已写满,抛出异常
    if (currentPos < this.fileSize) {
        //通过slice()创建于MappedFile的共享内存区域,设置position为当前指针
        ByteBuffer byteBuffer = writeBuffer != null ? writeBuffer.slice() : this.mappedByteBuffer.slice();
        byteBuffer.position(currentPos);
        AppendMessageResult result;
        if (messageExt instanceof MessageExtBrokerInner) {
            result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBrokerInner) messageExt);
        } else if (messageExt instanceof MessageExtBatch) {
            result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBatch) messageExt);
        } else {
            return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
        }
        //更新消息队列写入偏移量
        this.wrotePosition.addAndGet(result.getWroteBytes());
        this.storeTimestamp = result.getStoreTimestamp();
        return result;
    }
    log.error("MappedFile.appendMessage return null, wrotePosition: {} fileSize: {}", currentPos, this.fileSize);
    return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
CommitLog#doAppend()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
public AppendMessageResult doAppend(final long fileFromOffset, final ByteBuffer byteBuffer, final int maxBlank,
    final MessageExtBrokerInner msgInner) {
    // STORETIMESTAMP + STOREHOSTADDRESS + OFFSET <br>

    // PHY OFFSET
    long wroteOffset = fileFromOffset + byteBuffer.position();

    int sysflag = msgInner.getSysFlag();

    int bornHostLength = (sysflag & MessageSysFlag.BORNHOST_V6_FLAG) == 0 ? 4 + 4 : 16 + 4;
    int storeHostLength = (sysflag & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0 ? 4 + 4 : 16 + 4;
    ByteBuffer bornHostHolder = ByteBuffer.allocate(bornHostLength);
    ByteBuffer storeHostHolder = ByteBuffer.allocate(storeHostLength);

    this.resetByteBuffer(storeHostHolder, storeHostLength);
    String msgId;
    //创建全局唯一消息ID,如果使用IPV4消息ID为16字节,4字节IP,4字节端口号,8字节偏移量
    if ((sysflag & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0) {
        msgId = MessageDecoder.createMessageId(this.msgIdMemory, msgInner.getStoreHostBytes(storeHostHolder), wroteOffset);
    } else {
        //如果使用IPV6消息ID为28字节,16字节IP,4字节端口号,8字节偏移量
        msgId = MessageDecoder.createMessageId(this.msgIdV6Memory, msgInner.getStoreHostBytes(storeHostHolder), wroteOffset);
    }

    // Record ConsumeQueue information
    keyBuilder.setLength(0);
    keyBuilder.append(msgInner.getTopic());
    keyBuilder.append('-');
    keyBuilder.append(msgInner.getQueueId());
    String key = keyBuilder.toString();
    //获取消息主题在消息队列中的偏移量,如果不存在放入
    Long queueOffset = CommitLog.this.topicQueueTable.get(key);
    if (null == queueOffset) {
        queueOffset = 0L;
        CommitLog.this.topicQueueTable.put(key, queueOffset);
    }

    // Transaction messages that require special handling
    //事务消息做更多处理
    final int tranType = MessageSysFlag.getTransactionValue(msgInner.getSysFlag());
    switch (tranType) {
        // Prepared and Rollback message is not consumed, will not enter the
        // consumer queuec
        case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
        case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
            queueOffset = 0L;
            break;
        case MessageSysFlag.TRANSACTION_NOT_TYPE:
        case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
        default:
            break;
    }

    /**
     * Serialize message
     */
    final byte[] propertiesData =
        msgInner.getPropertiesString() == null ? null : msgInner.getPropertiesString().getBytes(MessageDecoder.CHARSET_UTF8);

    final int propertiesLength = propertiesData == null ? 0 : propertiesData.length;

    if (propertiesLength > Short.MAX_VALUE) {
        log.warn("putMessage message properties length too long. length={}", propertiesData.length);
        return new AppendMessageResult(AppendMessageStatus.PROPERTIES_SIZE_EXCEEDED);
    }

    final byte[] topicData = msgInner.getTopic().getBytes(MessageDecoder.CHARSET_UTF8);
    final int topicLength = topicData.length;

    final int bodyLength = msgInner.getBody() == null ? 0 : msgInner.getBody().length;
	//根据消息体长度,主题长度,属性长度计算消息总长度
    final int msgLen = calMsgLength(msgInner.getSysFlag(), bodyLength, topicLength, propertiesLength);

    // Exceeds the maximum message,校验消息长度是否大于最大值
    if (msgLen > this.maxMessageSize) {
        CommitLog.log.warn("message size exceeded, msg total size: " + msgLen + ", msg body size: " + bodyLength
            + ", maxMessageSize: " + this.maxMessageSize);
        return new AppendMessageResult(AppendMessageStatus.MESSAGE_SIZE_EXCEEDED);
    }

    // Determines whether there is sufficient free space
    //如果消息长度+END_FILE_MIN_BLANK_LENGTH大于CommitLog空闲空间,返回END_OF_FILE,Broker重新创建一个新的CommitLog
    if ((msgLen + END_FILE_MIN_BLANK_LENGTH) > maxBlank) {
        this.resetByteBuffer(this.msgStoreItemMemory, maxBlank);
        //CommitLog最少空闲8个字节,4位文件剩余空间,4位魔数
        // 1 TOTALSIZE
        this.msgStoreItemMemory.putInt(maxBlank);
        // 2 MAGICCODE
        this.msgStoreItemMemory.putInt(CommitLog.BLANK_MAGIC_CODE);
        // 3 The remaining space may be any value
        // Here the length of the specially set maxBlank
        final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
        byteBuffer.put(this.msgStoreItemMemory.array(), 0, maxBlank);
        return new AppendMessageResult(AppendMessageStatus.END_OF_FILE, wroteOffset, maxBlank, msgId, msgInner.getStoreTimestamp(),
            queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);
    }

    // Initialization of storage space存储空间初始化
    this.resetByteBuffer(msgStoreItemMemory, msgLen);
    // 1 TOTALSIZE
    this.msgStoreItemMemory.putInt(msgLen);
    // 2 MAGICCODE
    this.msgStoreItemMemory.putInt(CommitLog.MESSAGE_MAGIC_CODE);
    // 3 BODYCRC
    this.msgStoreItemMemory.putInt(msgInner.getBodyCRC());
    // 4 QUEUEID
    this.msgStoreItemMemory.putInt(msgInner.getQueueId());
    // 5 FLAG
    this.msgStoreItemMemory.putInt(msgInner.getFlag());
    // 6 QUEUEOFFSET
    this.msgStoreItemMemory.putLong(queueOffset);
    // 7 PHYSICALOFFSET
    this.msgStoreItemMemory.putLong(fileFromOffset + byteBuffer.position());
    // 8 SYSFLAG
    this.msgStoreItemMemory.putInt(msgInner.getSysFlag());
    // 9 BORNTIMESTAMP
    this.msgStoreItemMemory.putLong(msgInner.getBornTimestamp());
    // 10 BORNHOST
    this.resetByteBuffer(bornHostHolder, bornHostLength);
    this.msgStoreItemMemory.put(msgInner.getBornHostBytes(bornHostHolder));
    // 11 STORETIMESTAMP
    this.msgStoreItemMemory.putLong(msgInner.getStoreTimestamp());
    // 12 STOREHOSTADDRESS
    this.resetByteBuffer(storeHostHolder, storeHostLength);
    this.msgStoreItemMemory.put(msgInner.getStoreHostBytes(storeHostHolder));
    // 13 RECONSUMETIMES
    this.msgStoreItemMemory.putInt(msgInner.getReconsumeTimes());
    // 14 Prepared Transaction Offset
    this.msgStoreItemMemory.putLong(msgInner.getPreparedTransactionOffset());
    // 15 BODY
    this.msgStoreItemMemory.putInt(bodyLength);
    if (bodyLength > 0)
        this.msgStoreItemMemory.put(msgInner.getBody());
    // 16 TOPIC
    this.msgStoreItemMemory.put((byte) topicLength);
    this.msgStoreItemMemory.put(topicData);
    // 17 PROPERTIES
    this.msgStoreItemMemory.putShort((short) propertiesLength);
    if (propertiesLength > 0)
        this.msgStoreItemMemory.put(propertiesData);

    final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
    // Write messages to the queue buffer byte数组放入byteBuffer
    byteBuffer.put(this.msgStoreItemMemory.array(), 0, msgLen);
	//创建AppendMessageResult,只是将消息存储在MappedFile对应的内存映射中,还没有刷盘
    AppendMessageResult result = new AppendMessageResult(AppendMessageStatus.PUT_OK, wroteOffset, msgLen, msgId,
        msgInner.getStoreTimestamp(), queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);

    switch (tranType) {
        case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
        case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
            break;
        case MessageSysFlag.TRANSACTION_NOT_TYPE:
        case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
            // The next update ConsumeQueue information
            CommitLog.this.topicQueueTable.put(key, ++queueOffset);
            break;
        default:
            break;
    }
    return result;
}
AppendMessageStatus
1
2
3
4
5
6
7
8
9
10
11
12
public enum AppendMessageStatus {
    //追加成功
    PUT_OK,
    //超过文件大小
    END_OF_FILE,
    //消息长度超过最大允许长度
    MESSAGE_SIZE_EXCEEDED,
    //消息属性超过最大允许长度;
    PROPERTIES_SIZE_EXCEEDED,
    //未知异常
    UNKNOWN_ERROR,
}

DLedgerCommitLog#putMessage()

基于DLedger技术的CommitLog写入消息。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
@Override
public PutMessageResult putMessage(final MessageExtBrokerInner msg) {

    StoreStatsService storeStatsService = this.defaultMessageStore.getStoreStatsService();
    final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag());
    String topic = msg.getTopic();
    setMessageInfo(msg,tranType);

    // Back to Results
    AppendMessageResult appendResult;
    AppendFuture<AppendEntryResponse> dledgerFuture;
    EncodeResult encodeResult;

    putMessageLock.lock(); //spin or ReentrantLock ,depending on store config
    long elapsedTimeInLock;
    long queueOffset;
    try {
        beginTimeInDledgerLock = this.defaultMessageStore.getSystemClock().now();
        encodeResult = this.messageSerializer.serialize(msg);
        queueOffset = getQueueOffsetByKey(encodeResult.queueOffsetKey, tranType);
        encodeResult.setQueueOffsetKey(queueOffset);
        if (encodeResult.status != AppendMessageStatus.PUT_OK) {
            return new PutMessageResult(PutMessageStatus.MESSAGE_ILLEGAL, new AppendMessageResult(encodeResult.status));
        }
        AppendEntryRequest request = new AppendEntryRequest();
        request.setGroup(dLedgerConfig.getGroup());
        request.setRemoteId(dLedgerServer.getMemberState().getSelfId());
        request.setBody(encodeResult.getData());
        dledgerFuture = (AppendFuture<AppendEntryResponse>) dLedgerServer.handleAppend(request);
        if (dledgerFuture.getPos() == -1) {
            return new PutMessageResult(PutMessageStatus.OS_PAGECACHE_BUSY, new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR));
        }
        long wroteOffset = dledgerFuture.getPos() + DLedgerEntry.BODY_OFFSET;

        int msgIdLength = (msg.getSysFlag() & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0 ? 4 + 4 + 8 : 16 + 4 + 8;
        ByteBuffer buffer = ByteBuffer.allocate(msgIdLength);

        String msgId = MessageDecoder.createMessageId(buffer, msg.getStoreHostBytes(), wroteOffset);
        elapsedTimeInLock = this.defaultMessageStore.getSystemClock().now() - beginTimeInDledgerLock;
        appendResult = new AppendMessageResult(AppendMessageStatus.PUT_OK, wroteOffset, encodeResult.getData().length, msgId, System.currentTimeMillis(), queueOffset, elapsedTimeInLock);
        switch (tranType) {
            case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
            case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
                break;
            case MessageSysFlag.TRANSACTION_NOT_TYPE:
            case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
                // The next update ConsumeQueue information
                DLedgerCommitLog.this.topicQueueTable.put(encodeResult.queueOffsetKey, queueOffset + 1);
                break;
            default:
                break;
        }
    } catch (Exception e) {
        log.error("Put message error", e);
        return new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR));
    } finally {
        beginTimeInDledgerLock = 0;
        putMessageLock.unlock();
    }

    if (elapsedTimeInLock > 500) {
        log.warn("[NOTIFYME]putMessage in lock cost time(ms)={}, bodyLength={} AppendMessageResult={}", elapsedTimeInLock, msg.getBody().length, appendResult);
    }

    PutMessageStatus putMessageStatus = PutMessageStatus.UNKNOWN_ERROR;
    try {
        AppendEntryResponse appendEntryResponse = dledgerFuture.get(3, TimeUnit.SECONDS);
        switch (DLedgerResponseCode.valueOf(appendEntryResponse.getCode())) {
            case SUCCESS:
                putMessageStatus = PutMessageStatus.PUT_OK;
                break;
            case INCONSISTENT_LEADER:
            case NOT_LEADER:
            case LEADER_NOT_READY:
            case DISK_FULL:
                putMessageStatus = PutMessageStatus.SERVICE_NOT_AVAILABLE;
                break;
            case WAIT_QUORUM_ACK_TIMEOUT:
                //Do not return flush_slave_timeout to the client, for the ons client will ignore it.
                putMessageStatus = PutMessageStatus.OS_PAGECACHE_BUSY;
                break;
            case LEADER_PENDING_FULL:
                putMessageStatus = PutMessageStatus.OS_PAGECACHE_BUSY;
                break;
        }
    } catch (Throwable t) {
        log.error("Failed to get dledger append result", t);
    }

    PutMessageResult putMessageResult = new PutMessageResult(putMessageStatus, appendResult);
    if (putMessageStatus == PutMessageStatus.PUT_OK) {
        // Statistics
        storeStatsService.getSinglePutMessageTopicTimesTotal(msg.getTopic()).incrementAndGet();
        storeStatsService.getSinglePutMessageTopicSizeTotal(topic).addAndGet(appendResult.getWroteBytes());
    }
    return putMessageResult;
}
DLedgerServer#handleAppend()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
//     * Handle the append requests:
//     * 1.append the entry to local store
//     * 2.submit the future to entry pusher and wait the quorum ack
//     * 3.if the pending requests are full, then reject it immediately
public CompletableFuture<AppendEntryResponse> handleAppend(AppendEntryRequest request) throws IOException {
    try {
        PreConditions.check(memberState.getSelfId().equals(request.getRemoteId()), DLedgerResponseCode.UNKNOWN_MEMBER, "%s != %s", request.getRemoteId(), memberState.getSelfId());
        PreConditions.check(memberState.getGroup().equals(request.getGroup()), DLedgerResponseCode.UNKNOWN_GROUP, "%s != %s", request.getGroup(), memberState.getGroup());
        PreConditions.check(memberState.isLeader(), DLedgerResponseCode.NOT_LEADER);
        PreConditions.check(memberState.getTransferee() == null, DLedgerResponseCode.LEADER_TRANSFERRING);
        long currTerm = memberState.currTerm();
        if (dLedgerEntryPusher.isPendingFull(currTerm)) {
            AppendEntryResponse appendEntryResponse = new AppendEntryResponse();
            appendEntryResponse.setGroup(memberState.getGroup());
            appendEntryResponse.setCode(DLedgerResponseCode.LEADER_PENDING_FULL.getCode());
            appendEntryResponse.setTerm(currTerm);
            appendEntryResponse.setLeaderId(memberState.getSelfId());
            return AppendFuture.newCompletedFuture(-1, appendEntryResponse);
        } else {
            if (request instanceof BatchAppendEntryRequest) {
                BatchAppendEntryRequest batchRequest = (BatchAppendEntryRequest) request;
                if (batchRequest.getBatchMsgs() != null && batchRequest.getBatchMsgs().size() != 0) {
                    // record positions to return;
                    long[] positions = new long[batchRequest.getBatchMsgs().size()];
                    DLedgerEntry resEntry = null;
                    // split bodys to append
                    int index = 0;
                    Iterator<byte[]> iterator = batchRequest.getBatchMsgs().iterator();
                    while (iterator.hasNext()) {
                        DLedgerEntry dLedgerEntry = new DLedgerEntry();
                        dLedgerEntry.setBody(iterator.next());
                        resEntry = dLedgerStore.appendAsLeader(dLedgerEntry);
                        positions[index++] = resEntry.getPos();
                    }
                    // only wait last entry ack is ok
                    BatchAppendFuture<AppendEntryResponse> batchAppendFuture =
                            (BatchAppendFuture<AppendEntryResponse>) dLedgerEntryPusher.waitAck(resEntry, true);
                    batchAppendFuture.setPositions(positions);
                    return batchAppendFuture;
                }
                throw new DLedgerException(DLedgerResponseCode.REQUEST_WITH_EMPTY_BODYS, "BatchAppendEntryRequest" +
                        " with empty bodys");
            } else {
                DLedgerEntry dLedgerEntry = new DLedgerEntry();
                dLedgerEntry.setBody(request.getBody());
                DLedgerEntry resEntry = dLedgerStore.appendAsLeader(dLedgerEntry);
                return dLedgerEntryPusher.waitAck(resEntry, false);
            }
        }
    } catch (DLedgerException e) {
        logger.error("[{}][HandleAppend] failed", memberState.getSelfId(), e);
        AppendEntryResponse response = new AppendEntryResponse();
        response.copyBaseInfo(request);
        response.setCode(e.getCode().getCode());
        response.setLeaderId(memberState.getLeaderId());
        return AppendFuture.newCompletedFuture(-1, response);
    }
}