From f608d837b73653bd8fec58c994cc84fc8f27d5d6 Mon Sep 17 00:00:00 2001 From: huyubiao Date: Mon, 19 Jun 2023 15:29:11 +0800 Subject: [PATCH] sync patches from upstream --- ...nal-processing-instructions-for-init.patch | 184 +++ ...p-faild-when-.socket-is-in-listening.patch | 136 ++ ...er-correctly-during-reload-or-reexec.patch | 120 ++ ...nection-timeout-causing-event-blocki.patch | 33 + ...-data-in-app-memory-and-db-after-dae.patch | 814 ++++++++++ ...-fix-recycle-the-zombie-of-sysmaster.patch | 117 ++ ...adjusting-the-memory-usage-of-databa.patch | 1306 +++++++++++++++++ ...xStream-instead-TcpListener-to-get-t.patch | 4 +- sysmaster.spec | 31 +- 9 files changed, 2733 insertions(+), 12 deletions(-) create mode 100644 backport-docs-Add-signal-processing-instructions-for-init.patch create mode 100644 backport-fix-.socket-stop-faild-when-.socket-is-in-listening.patch create mode 100644 backport-fix-Clear-buffer-correctly-during-reload-or-reexec.patch create mode 100644 backport-fix-commands-connection-timeout-causing-event-blocki.patch create mode 100644 backport-fix-inconsistent-data-in-app-memory-and-db-after-dae.patch create mode 100644 backport-fix-recycle-the-zombie-of-sysmaster.patch create mode 100644 backport-fix-support-for-adjusting-the-memory-usage-of-databa.patch diff --git a/backport-docs-Add-signal-processing-instructions-for-init.patch b/backport-docs-Add-signal-processing-instructions-for-init.patch new file mode 100644 index 0000000..8841f8f --- /dev/null +++ b/backport-docs-Add-signal-processing-instructions-for-init.patch @@ -0,0 +1,184 @@ +From cef18c1e65333230fc8a9c45219a1c91105026c6 Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Tue, 6 Jun 2023 05:18:09 +0800 +Subject: [PATCH] docs: Add signal processing instructions for init + +--- + core/bin/unit/util/unit_file.rs | 2 +- + docs/man/signal.md | 147 +++++++++++++++++--------------- + 2 files changed, 79 insertions(+), 70 deletions(-) + +diff --git a/core/bin/unit/util/unit_file.rs b/core/bin/unit/util/unit_file.rs +index 0871703..d1f0bba 100644 +--- a/core/bin/unit/util/unit_file.rs ++++ b/core/bin/unit/util/unit_file.rs +@@ -157,7 +157,7 @@ impl UnitFileData { + continue; + } + let real_path = tmp.parent().unwrap().join(real_path); +- let real_path = fs::canonicalize(&real_path).unwrap(); ++ let real_path = fs::canonicalize(real_path).unwrap(); + let path_toml = format!("{}.toml", real_path.to_string_lossy()); + let to = Path::new(&path_toml); + if let Err(e) = std::fs::copy(&real_path, to) { +diff --git a/docs/man/signal.md b/docs/man/signal.md +index 6a61a25..3bf5253 100644 +--- a/docs/man/signal.md ++++ b/docs/man/signal.md +@@ -1,75 +1,84 @@ + # sysmaster的信号处理 + +-sysMaster采用`1+1+N`架构,sysmaster不再作为init进程,因此内核为init进程的信号做的特殊处理不再生效。下表列出了sysMaster与systemd针对不同信号的响应逻辑差异。 ++sysMaster采用`1+1+N`架构,sysmaster不再作为1号进程,因此内核为1号进程的信号做的特殊处理不再生效。下表列出了init(1号进程)、sysmaster、systemd的信号处理方式以及sysMaster与systemd针对不同信号的响应逻辑差异。 + +-|信号|sysmaster|systemd|对外表现是否有差异| +-|-|-|-|-| +-|1) SIGHUP|daemon-reexec|daemon-reload|N| +-|2) SIGINT|start ctrl-alt-del.target|start ctrl-alt-del.target|N| +-|3) SIGQUIT|crash handler|crash handler|N| +-|4) SIGILL|crash handler|crash handler|N| +-|5) SIGTRAP|IGN|DFL|N| +-|6) SIGABRT|crash handler|crash handler|N| +-|7) SIGBUS|crash handler|crash handler|N| +-|8) SIGFPE|crash handler|crash handler|N| +-|9) SIGKILL|DFL|内核主动屏蔽该信号|Y| +-|10) SIGUSR1|IGN|重连dbus|Y| +-|11) SIGSEGV|crash handler|crash handler|N| +-|12) SIGUSR2|IGN|输出所有单元的配置信息|Y| +-|13) SIGPIPE|IGN|IGN|N| +-|14) SIGALRM|IGN|DFL|N| +-|15) SIGTERM|daemon-reexec|daemon-reexec|N| +-|16) SIGSTKFLT|IGN|DFL|N| +-|17) SIGCHLD|子进程回收|子进程回收|N| +-|18) SIGCONT|IGN|DFL|N| +-|19) SIGSTOP|DFL|内核主动屏蔽该信号|Y| +-|20) SIGTSTP|IGN|DFL|N| +-|21) SIGTTIN|IGN|DFL|N| +-|22) SIGTTOU|IGN|DFL|N| +-|23) SIGURG|IGN|DFL|N| +-|24) SIGXCPU|IGN|DFL|N| +-|25) SIGXFSZ|IGN|DFL|N| +-|26) SIGVTALRM|IGN|DFL|N| +-|27) SIGPROF|IGN|DFL|N| +-|28) SIGWINCH|IGN|start kbrequest.target|Y| +-|29) SIGIO|IGN|DFL|N| +-|30) SIGPWR|IGN|start sigpwr.target|Y| +-|31) SIGSYS|IGN|DFL|N| +-|34) SIGRTMIN|IGN|start default.target|Y| +-|35) SIGRTMIN+1|IGN|isolate rescue.target|Y| +-|36) SIGRTMIN+2|IGN|isolate emergency.target|Y| +-|37) SIGRTMIN+3|IGN|start halt.target|Y| +-|38) SIGRTMIN+4|IGN|start poweroff.target|Y| +-|39) SIGRTMIN+5|IGN|start reboot.target|Y| +-|40) SIGRTMIN+6|IGN|start kexec.target|Y| +-|41) SIGRTMIN+7|daemon-reexec|DFL|Y| +-|42) SIGRTMIN+8|IGN|DFL|N| +-|43) SIGRTMIN+9|IGN|DFL|N| +-|44) SIGRTMIN+10|switch root|DFL|Y| +-|45) SIGRTMIN+11|IGN|DFL|N| +-|46) SIGRTMIN+12|IGN|DFL|N| +-|47) SIGRTMIN+13|IGN|Immediate halt|Y| +-|48) SIGRTMIN+14|IGN|Immediate poweroff|Y| +-|49) SIGRTMIN+15|IGN|Immediate reboot|Y| +-|50) SIGRTMAX-14 SIGRTMIN+16|IGN|Immediate kexec|Y| +-|51) SIGRTMAX-13 SIGRTMIN+17|IGN|DFL|N| +-|52) SIGRTMAX-12 SIGRTMIN+18|IGN|DFL|N| +-|53) SIGRTMAX-11 SIGRTMIN+19|IGN|DFL|N| +-|54) SIGRTMAX-10 SIGRTMIN+20|IGN|enable status messages|Y| +-|55) SIGRTMAX-9 SIGRTMIN+21|IGN|disable status messages|Y| +-|56) SIGRTMAX-8 SIGRTMIN+22|IGN|日志级别设为debug|Y| +-|57) SIGRTMAX-7 SIGRTMIN+23|IGN|日志级别设为info|Y| +-|58) SIGRTMAX-6 SIGRTMIN+24|IGN|Immediate exit (仅限于用户模式)|Y| +-|59) SIGRTMAX-5 SIGRTMIN+25|IGN|reexecute manager|Y| +-|60) SIGRTMAX-4 SIGRTMIN+26|IGN|日志输出设为journal-or-kmsg|Y| +-|61) SIGRTMAX-3 SIGRTMIN+27|IGN|日志输出设为console|Y| +-|62) SIGRTMAX-2 SIGRTMIN+28|IGN|日志输出设为kmsg|Y| +-|63) SIGRTMAX-1 SIGRTMIN+29|IGN|日志输出设为syslog-or-kmsg|Y| +-|64) SIGRTMAX SIGRTMIN+30|IGN|DFL|N| ++|信号|init|sysmaster|systemd|sysmaster与systemd对外表现是否有差异| ++|-|-|-|-|-| ++|1) SIGHUP|捕获不处理|daemon-reexec|daemon-reload|N| ++|2) SIGINT|捕获不处理|start ctrl-alt-del.target|start ctrl-alt-del.target|N| ++|3) SIGQUIT|捕获不处理|故障恢复|crash handler|Y| ++|4) SIGILL|捕获不处理|故障恢复|crash handler|Y| ++|5) SIGTRAP|捕获不处理|IGN|DFL|N| ++|6) SIGABRT|捕获不处理|故障恢复|crash handler|Y| ++|7) SIGBUS|捕获不处理|故障恢复|crash handler|Y| ++|8) SIGFPE|捕获不处理|故障恢复|crash handler|Y| ++|9) SIGKILL|内核主动屏蔽该信号|DFL|内核主动屏蔽该信号|Y| ++|10) SIGUSR1|捕获不处理|IGN|重连dbus|Y| ++|11) SIGSEGV|捕获不处理|故障恢复|crash handler|Y| ++|12) SIGUSR2|捕获不处理|IGN|输出所有单元的配置信息|Y| ++|13) SIGPIPE|捕获不处理|IGN|IGN|N| ++|14) SIGALRM|捕获不处理|IGN|DFL|N| ++|15) SIGTERM|捕获不处理|daemon-reexec|daemon-reexec|N| ++|16) SIGSTKFLT|捕获不处理|IGN|DFL|N| ++|17) SIGCHLD|僵尸子进程回收|僵尸子进程回收|僵尸子进程回收|N| ++|18) SIGCONT|捕获不处理|IGN|DFL|N| ++|19) SIGSTOP|内核主动屏蔽该信号|DFL|内核主动屏蔽该信号|Y| ++|20) SIGTSTP|捕获不处理|IGN|DFL|N| ++|21) SIGTTIN|捕获不处理|IGN|DFL|N| ++|22) SIGTTOU|捕获不处理|IGN|DFL|N| ++|23) SIGURG|捕获不处理|IGN|DFL|N| ++|24) SIGXCPU|捕获不处理|IGN|DFL|N| ++|25) SIGXFSZ|捕获不处理|IGN|DFL|N| ++|26) SIGVTALRM|捕获不处理|IGN|DFL|N| ++|27) SIGPROF|捕获不处理|IGN|DFL|N| ++|28) SIGWINCH|捕获不处理|IGN|start kbrequest.target|Y| ++|29) SIGIO|捕获不处理|IGN|DFL|N| ++|30) SIGPWR|捕获不处理|IGN|start sigpwr.target|Y| ++|31) SIGSYS|捕获不处理|故障恢复|DFL|N| ++|34) SIGRTMIN|捕获不处理|IGN|start default.target|Y| ++|35) SIGRTMIN+1|捕获不处理|IGN|isolate rescue.target|Y| ++|36) SIGRTMIN+2|捕获不处理|IGN|isolate emergency.target|Y| ++|37) SIGRTMIN+3|捕获不处理|IGN|start halt.target|Y| ++|38) SIGRTMIN+4|捕获不处理|IGN|start poweroff.target|Y| ++|39) SIGRTMIN+5|捕获不处理|IGN|start reboot.target|Y| ++|40) SIGRTMIN+6|捕获不处理|IGN|start kexec.target|Y| ++|41) SIGRTMIN+7|捕获不处理|daemon-reexec|DFL|Y| ++|42) SIGRTMIN+8|unrecover state|IGN|DFL|N| ++|43) SIGRTMIN+9|重执行sysmaster|IGN|DFL|N| ++|44) SIGRTMIN+10|switch root|IGN|DFL|Y| ++|45) SIGRTMIN+11|捕获不处理|IGN|DFL|N| ++|46) SIGRTMIN+12|捕获不处理|IGN|DFL|N| ++|47) SIGRTMIN+13|捕获不处理|IGN|Immediate halt|Y| ++|48) SIGRTMIN+14|捕获不处理|IGN|Immediate poweroff|Y| ++|49) SIGRTMIN+15|捕获不处理|IGN|Immediate reboot|Y| ++|50) SIGRTMAX-14 SIGRTMIN+16|捕获不处理|IGN|Immediate kexec|Y| ++|51) SIGRTMAX-13 SIGRTMIN+17|捕获不处理|IGN|DFL|N| ++|52) SIGRTMAX-12 SIGRTMIN+18|捕获不处理|IGN|DFL|N| ++|53) SIGRTMAX-11 SIGRTMIN+19|捕获不处理|IGN|DFL|N| ++|54) SIGRTMAX-10 SIGRTMIN+20|捕获不处理|IGN|enable status messages|Y| ++|55) SIGRTMAX-9 SIGRTMIN+21|捕获不处理|IGN|disable status messages|Y| ++|56) SIGRTMAX-8 SIGRTMIN+22|捕获不处理|IGN|日志级别设为debug|Y| ++|57) SIGRTMAX-7 SIGRTMIN+23|捕获不处理|IGN|日志级别设为info|Y| ++|58) SIGRTMAX-6 SIGRTMIN+24|捕获不处理|IGN|Immediate exit (仅限于用户模式)|Y| ++|59) SIGRTMAX-5 SIGRTMIN+25|捕获不处理|IGN|reexecute manager|Y| ++|60) SIGRTMAX-4 SIGRTMIN+26|捕获不处理|IGN|日志输出设为journal-or-kmsg|Y| ++|61) SIGRTMAX-3 SIGRTMIN+27|捕获不处理|IGN|日志输出设为console|Y| ++|62) SIGRTMAX-2 SIGRTMIN+28|捕获不处理|IGN|日志输出设为kmsg|Y| ++|63) SIGRTMAX-1 SIGRTMIN+29|捕获不处理|IGN|日志输出设为syslog-or-kmsg|Y| ++|64) SIGRTMAX SIGRTMIN+30|捕获不处理|IGN|DFL|N| + + 表格的具体说明: + +-1. IGN、DFL分别表示信号处理函数:SIG_IGN(忽略)、SIG_DFL(缺省的信号处理函数)。如果init进程没有注册信号处理函数,即使用SIG_DFL,内核会直接屏蔽掉该信号。因此在对外表现上,SIG_IGN和SIG_DFL是一致的。 +-2. SIGKILL、SIGSTOP信号是内核为init进程无条件屏蔽的,且不允许通过sigaction修改其信号处理函数,sysmaster当前没有方案消除该差异。 +-3. SIGUSR1、SIGUSR2、SIGWINCH、SIGPWR、SIGRTMIN+{0-6、13-16、20-29}的差异后续能够消除。 +-4. SIGRTMIN+7在sysMaster中为init进程发给sysmaster进程,主动触发热重启。SIGRTMIN+10在sysMaster中为init进程发给sysmaster,执行switch root。 ++1. IGN、DFL分别表示信号处理函数:SIG_IGN(忽略)、SIG_DFL(缺省的信号处理函数)。如果1号进程没有注册信号处理函数,即使用SIG_DFL,内核会直接屏蔽掉该信号。因此1号进程在对外表现上,SIG_IGN和SIG_DFL是一致的。 ++2. SIGKILL、SIGSTOP信号是内核为1号进程无条件屏蔽的,且不允许通过sigaction修改其信号处理函数,sysmaster当前没有方案消除该差异。 ++3. init进程: ++ - SIGCHLD: 回收所有僵尸子进程。 ++ - SIGRTMIN+8: 进入不可恢复状态。 ++ - SIGRTMIN+9: 下发重执行sysmaster信号。 ++ - SIGRTMIN+10: 只处理sysmaster进程的SIGRTMIN+10信号,其余进程忽略。重执行init自身,重执行sysmaster。 ++ - init捕获所有可捕获信号,除上述信号做了处理以外,其余都忽略不处理。 ++4. sysmaster进程: ++ - SIGCHLD: 回收所有僵尸子进程。 ++ - SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT,SIGSYS: 故障恢复,重执行sysmaster。 ++ - SIGRTMIN+7: 只处理init进程下发的SIGRTMIN+7信号,sysmaster接收到该信号后主动触发热重启。 ++ - SIGUSR1、SIGUSR2、SIGWINCH、SIGPWR、SIGRTMIN+{0-6、13-16、20-29}sysmaster与systemd的差异后续能够消除。 +-- +2.33.0 + diff --git a/backport-fix-.socket-stop-faild-when-.socket-is-in-listening.patch b/backport-fix-.socket-stop-faild-when-.socket-is-in-listening.patch new file mode 100644 index 0000000..12f7daf --- /dev/null +++ b/backport-fix-.socket-stop-faild-when-.socket-is-in-listening.patch @@ -0,0 +1,136 @@ +From 6026b784dfff831796ab04ddbf3913684360d845 Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Fri, 2 Jun 2023 05:00:56 +0800 +Subject: [PATCH] fix: .socket stop faild when .socket is in listening or + running state + +--- + coms/socket/src/config.rs | 20 +++++++------------- + coms/socket/src/load.rs | 2 +- + coms/socket/src/unit.rs | 10 ++++++++++ + core/bin/unit/entry/uentry.rs | 12 +++++++++++- + 4 files changed, 29 insertions(+), 15 deletions(-) + +diff --git a/coms/socket/src/config.rs b/coms/socket/src/config.rs +index 88c9e1a..a05fba5 100644 +--- a/coms/socket/src/config.rs ++++ b/coms/socket/src/config.rs +@@ -98,7 +98,7 @@ impl ReStation for SocketConfig { + + // UnitRef + if let Some(svc) = service { +- self.set_unit_ref(svc).unwrap(); ++ self.set_unit_ref(svc); + } + + // SocketPortConf +@@ -168,15 +168,9 @@ impl SocketConfig { + self.data.borrow().get_exec_cmds(cmd_type) + } + +- pub(super) fn set_unit_ref(&self, service: String) -> Result<()> { +- if !self.comm.um().load_unit_success(&service) { +- return Err(format!("failed to load unit {service}").into()); +- } +- ++ pub(super) fn set_unit_ref(&self, service: String) { + self.set_ref(service); + self.db_update(); +- +- Ok(()) + } + + pub(super) fn unit_ref_target(&self) -> Option { +@@ -190,12 +184,11 @@ impl SocketConfig { + fn parse_service(&self) -> Result<()> { + if let Some(service) = self.config_data().borrow().Socket.Service.clone() { + if !service.ends_with(".service") { +- return Err("socket service must be end with .service" +- .to_string() +- .into()); ++ log::warn!("socket service must be end with .service, ignoring:{service}"); ++ return Ok(()); + } + +- self.set_unit_ref(service)?; ++ self.set_unit_ref(service); + } + + Ok(()) +@@ -245,7 +238,8 @@ impl SocketConfig { + + let socket_addr = match parse_func(v, socket_type) { + Err(_) => { +- return Err(format!("Invalid socket configuration: {v}").into()); ++ log::warn!("Invalid socket configuration: {v}"); ++ return Ok(()); + } + Ok(v) => v, + }; +diff --git a/coms/socket/src/load.rs b/coms/socket/src/load.rs +index 39e8e66..d495389 100644 +--- a/coms/socket/src/load.rs ++++ b/coms/socket/src/load.rs +@@ -70,7 +70,7 @@ impl SocketLoad { + let u_name = unit_name.unwrap(); + let stem_name = Path::new(&u_name).file_stem().unwrap().to_str().unwrap(); + let relate_name = format!("{stem_name}.{suffix}"); +- self.config.set_unit_ref(relate_name)?; ++ self.config.set_unit_ref(relate_name); + Ok(()) + } + +diff --git a/coms/socket/src/unit.rs b/coms/socket/src/unit.rs +index 9f0f6ce..d593350 100644 +--- a/coms/socket/src/unit.rs ++++ b/coms/socket/src/unit.rs +@@ -204,6 +204,13 @@ impl SocketUnit { + } + + fn verify(&self) -> Result<()> { ++ if self.config.ports().is_empty() { ++ log::error!("Unit has no Listen setting (ListenStream=, ListenDatagram=, ListenFIFO=, ...). Refusing."); ++ return Err(Error::Nix { ++ source: nix::Error::ENOEXEC, ++ }); ++ } ++ + let config = self.config.config_data(); + if config.borrow().Socket.Symlinks.is_some() + && !config.borrow().Socket.Symlinks.as_ref().unwrap().is_empty() +@@ -212,6 +219,9 @@ impl SocketUnit { + /* Set to None, so we won't create symlinks by mistake. */ + config.borrow_mut().Socket.Symlinks = None; + log::error!("Symlinks in [Socket] is configured, but there are none or more than one listen files."); ++ return Err(Error::Nix { ++ source: nix::Error::ENOEXEC, ++ }); + } + Ok(()) + } +diff --git a/core/bin/unit/entry/uentry.rs b/core/bin/unit/entry/uentry.rs +index f6b4db4..0c6e044 100644 +--- a/core/bin/unit/entry/uentry.rs ++++ b/core/bin/unit/entry/uentry.rs +@@ -578,7 +578,17 @@ impl Unit { + Ok(_) => { + let paths = self.load.get_unit_id_fragment_pathbuf(); + log::debug!("Begin exec sub class load"); +- self.sub.load(paths)?; ++ ++ if let Err(err) = self.sub.load(paths) { ++ if let Error::Nix { source } = err { ++ if source == nix::Error::ENOEXEC { ++ self.load.set_load_state(UnitLoadState::BadSetting); ++ return Err(err); ++ } ++ } ++ self.load.set_load_state(UnitLoadState::Error); ++ return Err(err); ++ } + + self.load.set_load_state(UnitLoadState::Loaded); + Ok(()) +-- +2.33.0 + diff --git a/backport-fix-Clear-buffer-correctly-during-reload-or-reexec.patch b/backport-fix-Clear-buffer-correctly-during-reload-or-reexec.patch new file mode 100644 index 0000000..fb0aec1 --- /dev/null +++ b/backport-fix-Clear-buffer-correctly-during-reload-or-reexec.patch @@ -0,0 +1,120 @@ +From 754126dc701fe1d945b60c634b0c84d3c976b23b Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Wed, 7 Jun 2023 16:33:26 +0800 +Subject: [PATCH] fix: Clear buffer correctly during reload or reexec + +--- + core/lib/rel/api.rs | 22 +++++++++++----------- + core/lib/rel/base.rs | 10 +++++++--- + core/lib/rel/history.rs | 1 - + 3 files changed, 18 insertions(+), 15 deletions(-) + +diff --git a/core/lib/rel/api.rs b/core/lib/rel/api.rs +index 59e4ac9..ad14796 100644 +--- a/core/lib/rel/api.rs ++++ b/core/lib/rel/api.rs +@@ -187,11 +187,7 @@ impl Reliability { + self.db_compensate(); + self.db_map(reload); + if reload { +- // If daemon-reload or daemon-reexec, we need to update all changes, clear db, and submit all changes to db. +- self.db_insert(); +- self.history.flush(); +- // Due to changes in db, we need to reload the data from db to cache. +- self.history.import(); ++ self.db_flush(); + } + self.make_consistent(); + +@@ -309,12 +305,10 @@ impl Reliability { + } + + fn input_rebuild(&self) { +- // ignore history's input + self.history.switch_set(true); + + self.station.input_rebuild(); + +- // restore history's ignore + self.history.switch_set(false); + } + +@@ -331,22 +325,28 @@ impl Reliability { + /// map data from database + /// reload determine whether the configuration needs to be reloaded based on the situation. + fn db_map(&self, reload: bool) { +- // control use buf + self.history.switch_set(true); + + self.station.db_map(reload); + +- // control use cache + self.history.switch_set(false); + } + +- /// map-data insert to buf, switch is true indicating either daemon-reload or daemon-reexec +- fn db_insert(&self) { ++ /// flush all data from buffer to db ++ fn db_flush(&self) { ++ // clear data before using buffer + self.history.switch_set(true); + ++ // update all changes to buffer + self.station.db_insert(); + ++ // clear db, submit data from all buffers to db, clear buffer ++ self.history.flush(); ++ + self.history.switch_set(false); ++ ++ // Due to changes in db, reload the data from db to cache. ++ self.history.import(); + } + + fn make_consistent(&self) { +diff --git a/core/lib/rel/base.rs b/core/lib/rel/base.rs +index 29200df..7137e13 100644 +--- a/core/lib/rel/base.rs ++++ b/core/lib/rel/base.rs +@@ -27,8 +27,7 @@ use std::{env, fs}; + /// the reliability database + /// K & V that can be deserialized without borrowing any data from the deserializer. + pub struct ReDb { +- // control +- switch: RefCell, ++ switch: RefCell, // if switch is true use buffer, if switch is false use cache + + // data + /* database */ +@@ -97,10 +96,15 @@ where + self.cache.borrow_mut().clear(); + self.add.borrow_mut().clear(); + self.del.borrow_mut().clear(); ++ // Do not clear the buffer because its data is transient. + } + +- /// set the buffer-switch flag of data ++ /// switch between cache and buffer + pub fn switch_buffer(&self, switch: bool) { ++ if switch { ++ // Before using the buffer, data needs to be cleared. ++ self.buffer.borrow_mut().clear(); ++ } + *self.switch.borrow_mut() = switch; + } + +diff --git a/core/lib/rel/history.rs b/core/lib/rel/history.rs +index f9e2146..199e10e 100644 +--- a/core/lib/rel/history.rs ++++ b/core/lib/rel/history.rs +@@ -158,7 +158,6 @@ impl ReliHistory { + } + + pub fn switch_set(&self, switch: bool) { +- // set switch + *self.switch.borrow_mut() = switch; + for (_, db) in self.dbs.borrow().iter() { + db.switch_set(switch); +-- +2.33.0 + diff --git a/backport-fix-commands-connection-timeout-causing-event-blocki.patch b/backport-fix-commands-connection-timeout-causing-event-blocki.patch new file mode 100644 index 0000000..62499fa --- /dev/null +++ b/backport-fix-commands-connection-timeout-causing-event-blocki.patch @@ -0,0 +1,33 @@ +From 7dea4930d6118ffb650a34aafc7a37dcdb198cf9 Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Thu, 8 Jun 2023 10:36:38 +0800 +Subject: [PATCH] fix: commands connection timeout causing event blocking + +--- + core/bin/manager/commands.rs | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/core/bin/manager/commands.rs b/core/bin/manager/commands.rs +index 94877a2..0727295 100644 +--- a/core/bin/manager/commands.rs ++++ b/core/bin/manager/commands.rs +@@ -41,7 +41,7 @@ impl Commands { + let socket_fd = socket::socket( + socket::AddressFamily::Unix, + socket::SockType::Stream, +- socket::SockFlag::empty(), ++ socket::SockFlag::SOCK_CLOEXEC | socket::SockFlag::SOCK_NONBLOCK, + None, + ) + .unwrap(); +@@ -112,6 +112,6 @@ where + } + + fn priority(&self) -> i8 { +- 10i8 ++ 0i8 + } + } +-- +2.33.0 + diff --git a/backport-fix-inconsistent-data-in-app-memory-and-db-after-dae.patch b/backport-fix-inconsistent-data-in-app-memory-and-db-after-dae.patch new file mode 100644 index 0000000..2018e02 --- /dev/null +++ b/backport-fix-inconsistent-data-in-app-memory-and-db-after-dae.patch @@ -0,0 +1,814 @@ +From a80ad5b0aef20f9f5d6e5f06450311c39075a16e Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Thu, 25 May 2023 08:22:31 +0800 +Subject: [PATCH] fix: inconsistent data in app memory and db after + daemon-reload or daemon-reexec + +--- + coms/mount/src/rentry.rs | 16 ++++++-- + coms/service/src/mng.rs | 1 + + coms/service/src/rentry.rs | 16 ++++++-- + coms/socket/src/mng.rs | 11 ++--- + coms/socket/src/rentry.rs | 24 ++++++++--- + coms/target/src/rentry.rs | 8 +++- + core/bin/job/entry.rs | 4 +- + core/bin/job/manager.rs | 9 +++++ + core/bin/job/table.rs | 30 ++++++++++++++ + core/bin/unit/datastore/child.rs | 9 +++++ + core/bin/unit/datastore/deps.rs | 5 +++ + core/bin/unit/datastore/mod.rs | 8 ++++ + core/bin/unit/entry/uentry.rs | 11 +++++ + core/bin/unit/entry/unitx.rs | 4 ++ + core/bin/unit/manager.rs | 28 +++++++++++++ + core/bin/unit/runtime.rs | 11 +++++ + core/lib/rel/api.rs | 30 ++++++++++---- + core/lib/rel/base.rs | 69 +++++++++++++++++++++++--------- + core/lib/rel/history.rs | 23 +++++++++-- + core/lib/rel/station.rs | 12 ++++++ + 20 files changed, 277 insertions(+), 52 deletions(-) + +diff --git a/coms/mount/src/rentry.rs b/coms/mount/src/rentry.rs +index 814ad21..45c0ce0 100644 +--- a/coms/mount/src/rentry.rs ++++ b/coms/mount/src/rentry.rs +@@ -103,12 +103,16 @@ impl ReDbTable for MountReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } + +@@ -121,11 +125,15 @@ impl ReDbTable for MountReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } +diff --git a/coms/service/src/mng.rs b/coms/service/src/mng.rs +index d73fa34..0f5db5f 100644 +--- a/coms/service/src/mng.rs ++++ b/coms/service/src/mng.rs +@@ -130,6 +130,7 @@ impl ReStation for ServiceMng { + } + + fn entry_clear(&self) { ++ // pid_file is a transient file that can be directly closed + self.unwatch_pid_file(); + + self.stop_watchdog(); +diff --git a/coms/service/src/rentry.rs b/coms/service/src/rentry.rs +index df8a255..175828e 100644 +--- a/coms/service/src/rentry.rs ++++ b/coms/service/src/rentry.rs +@@ -598,12 +598,16 @@ impl ReDbTable for ServiceReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } + +@@ -616,11 +620,15 @@ impl ReDbTable for ServiceReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } +diff --git a/coms/socket/src/mng.rs b/coms/socket/src/mng.rs +index e079acd..e7a6a01 100644 +--- a/coms/socket/src/mng.rs ++++ b/coms/socket/src/mng.rs +@@ -109,13 +109,14 @@ impl ReStation for SocketMng { + + // reload: entry-only + fn entry_coldplug(&self) { +- if self.state() != SocketState::Listening { ++ if self.state() == SocketState::Listening { + self.watch_fds(); + } + } + + fn entry_clear(&self) { +- self.close_fds(); ++ // port fd is a long-term monitoring file and cannot be closed ++ self.unwatch_fds(); + } + } + +@@ -690,11 +691,11 @@ impl SocketMng { + } + + fn map_ports_fd(&self, rports: Vec<(PortType, String, RawFd)>) { +- assert_eq!(rports.len(), self.ports().len()); +- + for (p_type, listen, fd) in rports.iter() { + match self.ports_find(*p_type, listen) { +- Some(port) => port.set_fd(self.comm.reli().fd_take(*fd)), ++ Some(port) => { ++ port.set_fd(self.comm.reli().fd_take(*fd)); ++ } + None => log::debug!("Not find {:?}:{:?}", *p_type, listen), + } + } +diff --git a/coms/socket/src/rentry.rs b/coms/socket/src/rentry.rs +index 1794e69..2409c84 100644 +--- a/coms/socket/src/rentry.rs ++++ b/coms/socket/src/rentry.rs +@@ -330,12 +330,16 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } + +@@ -348,12 +352,16 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } + +@@ -366,11 +374,15 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } +diff --git a/coms/target/src/rentry.rs b/coms/target/src/rentry.rs +index 8f0ce69..9d3f90c 100644 +--- a/coms/target/src/rentry.rs ++++ b/coms/target/src/rentry.rs +@@ -77,11 +77,15 @@ impl ReDbTable for TargetReDb { + self.0.cache_2_db(db_wtxn); + } + ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.rebuf_2_db(db_wtxn); ++ } ++ + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { + self.0.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.0.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.0.set_switch(switch); + } + } +diff --git a/core/bin/job/entry.rs b/core/bin/job/entry.rs +index 55a0015..54af665 100755 +--- a/core/bin/job/entry.rs ++++ b/core/bin/job/entry.rs +@@ -472,7 +472,7 @@ impl Job { + last_rkind != rkind + } + +- fn rentry_trigger_insert(&self) { ++ pub(super) fn rentry_trigger_insert(&self) { + self.rentry + .trigger_insert(self.unit.id(), self.kind, &self.attr.borrow()); + } +@@ -485,7 +485,7 @@ impl Job { + self.rentry.trigger_get(self.unit.id()) + } + +- fn rentry_suspends_insert(&self) { ++ pub(super) fn rentry_suspends_insert(&self) { + self.rentry + .suspends_insert(self.unit.id(), self.kind, &self.attr.borrow()); + } +diff --git a/core/bin/job/manager.rs b/core/bin/job/manager.rs +index f5a247d..e8eee8b 100755 +--- a/core/bin/job/manager.rs ++++ b/core/bin/job/manager.rs +@@ -108,6 +108,10 @@ impl ReStation for JobManager { + self.data.db_map(); + } + ++ fn db_insert(&self) { ++ self.data.db_insert(); ++ } ++ + // reload: special entry_coldplug + // repeating protection + fn entry_clear(&self) { +@@ -386,6 +390,11 @@ impl JobManagerData { + self.stat.clear_cnt(); // no history + } + ++ pub(self) fn db_insert(&self) { ++ self.jobs.rentry_insert_trigger(); ++ self.jobs.rentry_insert_suspend(); ++ } ++ + pub(self) fn coldplug_unit(&self, unit: &UnitX) { + // trigger + self.jobs.coldplug_trigger(unit); +diff --git a/core/bin/job/table.rs b/core/bin/job/table.rs +index 91ac45b..7b77b1a 100644 +--- a/core/bin/job/table.rs ++++ b/core/bin/job/table.rs +@@ -60,6 +60,12 @@ impl JobTable { + job + } + ++ pub(super) fn rentry_insert_suspend(&self) { ++ for job in self.t_unit.borrow().get_all_suspends() { ++ job.rentry_suspends_insert(); ++ } ++ } ++ + pub(super) fn rentry_map_trigger(&self, ja: &JobAlloc, config: &JobConf) -> Rc { + let job = ja.alloc(config); + job.rentry_map_trigger(); +@@ -67,6 +73,12 @@ impl JobTable { + job + } + ++ pub(super) fn rentry_insert_trigger(&self) { ++ for job in self.t_unit.borrow().get_all_triggers() { ++ job.rentry_trigger_insert(); ++ } ++ } ++ + pub(super) fn coldplug_suspend(&self, unit: &UnitX) { + for job in self.t_unit.borrow().get_suspends(unit).iter() { + job.coldplug_suspend(); +@@ -712,6 +724,24 @@ impl JobUnitTable { + jobs + } + ++ pub(self) fn get_all_suspends(&self) -> Vec> { ++ let mut jobs = Vec::new(); ++ for (_, uv) in self.t_data.iter() { ++ jobs.append(&mut uv.get_suspends()); ++ } ++ jobs ++ } ++ ++ pub(self) fn get_all_triggers(&self) -> Vec> { ++ let mut jobs = Vec::new(); ++ for (_, uv) in self.t_data.iter() { ++ if let Some(job) = uv.get_trigger() { ++ jobs.push(job); ++ } ++ } ++ jobs ++ } ++ + pub(self) fn get_trigger_info(&self, unit: &UnitX) -> Option<(Rc, bool)> { + if let Some(uv) = self.t_data.get(unit) { + uv.get_trigger() +diff --git a/core/bin/unit/datastore/child.rs b/core/bin/unit/datastore/child.rs +index af63792..60c73c5 100644 +--- a/core/bin/unit/datastore/child.rs ++++ b/core/bin/unit/datastore/child.rs +@@ -37,6 +37,10 @@ impl ReStation for UnitChild { + self.data.db_map(&self.units); + } + ++ fn db_insert(&self) { ++ self.data.db_insert(); ++ } ++ + // reload + fn entry_clear(&self) { + self.data.entry_clear(); +@@ -132,6 +136,11 @@ impl UnitChildData { + } + } + ++ pub(self) fn db_insert(&self) { ++ // [key:pid, data:unit[s]] and [key:unit, data:pid[s]](RELI_DB_HUNIT_CHILD) are equivalent ++ // So here [key:unit, data:pids] is reused and [key:pid, data:unit[s]] is not stored in the database. ++ } ++ + pub(self) fn add_watch_pid(&self, unit: Rc, pid: Pid) { + let mut watch_pids = self.watch_pids.borrow_mut(); + watch_pids.insert(pid, unit); +diff --git a/core/bin/unit/datastore/deps.rs b/core/bin/unit/datastore/deps.rs +index cd38d45..80836ef 100644 +--- a/core/bin/unit/datastore/deps.rs ++++ b/core/bin/unit/datastore/deps.rs +@@ -39,6 +39,11 @@ impl ReStation for UnitDep { + self.sub.data.borrow_mut().db_map(); + } + ++ fn db_insert(&self) { ++ // If the data changes under db_map() when reload is true, it needs to be inserted. ++ // db_map currently does nothing to do. ++ } ++ + // reload + fn entry_clear(&self) { + self.sub.data.borrow_mut().clear(); +diff --git a/core/bin/unit/datastore/mod.rs b/core/bin/unit/datastore/mod.rs +index 1dc09d7..2a5f31b 100644 +--- a/core/bin/unit/datastore/mod.rs ++++ b/core/bin/unit/datastore/mod.rs +@@ -70,6 +70,14 @@ impl UnitDb { + self.child.db_map(reload); + } + ++ pub(super) fn db_insert_excl_units(&self) { ++ // dep ++ self.dep.db_insert(); ++ ++ // child ++ self.child.db_insert(); ++ } ++ + pub fn units_insert(&self, name: String, unit: Rc) -> Option> { + self.units.insert(name, unit) + } +diff --git a/core/bin/unit/entry/uentry.rs b/core/bin/unit/entry/uentry.rs +index fd05746..a22ad10 100644 +--- a/core/bin/unit/entry/uentry.rs ++++ b/core/bin/unit/entry/uentry.rs +@@ -94,6 +94,17 @@ impl ReStation for Unit { + self.sub.db_map(reload); + } + ++ // data insert ++ fn db_insert(&self) { ++ self.base.db_insert(); ++ self.config.db_insert(); ++ self.cgroup.db_insert(); ++ self.load.db_insert(); ++ self.child.db_insert(); ++ ++ self.sub.db_insert(); ++ } ++ + // reload: entry-only + fn entry_coldplug(&self) { + // rebuild external connections, like: timer, ... +diff --git a/core/bin/unit/entry/unitx.rs b/core/bin/unit/entry/unitx.rs +index 9ffd549..d9878f4 100644 +--- a/core/bin/unit/entry/unitx.rs ++++ b/core/bin/unit/entry/unitx.rs +@@ -38,6 +38,10 @@ impl ReStation for UnitX { + self.0.db_map(reload); + } + ++ fn db_insert(&self) { ++ self.0.db_insert(); ++ } ++ + // reload: entry-only + fn entry_coldplug(&self) { + self.0.entry_coldplug(); +diff --git a/core/bin/unit/manager.rs b/core/bin/unit/manager.rs +index bee3093..6e83240 100644 +--- a/core/bin/unit/manager.rs ++++ b/core/bin/unit/manager.rs +@@ -1198,6 +1198,28 @@ impl ReStation for UnitManager { + self.sms.db_map(reload); + } + ++ // data ++ fn db_insert(&self) { ++ for unit in self.db.units_get_all(None).iter() { ++ unit.db_insert(); ++ } ++ ++ /* others: unit-dep and unit-child */ ++ self.db.db_insert_excl_units(); ++ ++ // rt ++ self.rt.db_insert(); ++ ++ // job ++ self.jm.db_insert(); ++ ++ // notify ++ self.notify.db_insert(); ++ ++ // sub-manager ++ self.sms.db_insert(); ++ } ++ + // reload + fn register_ex(&self) { + // notify +@@ -1290,6 +1312,12 @@ mod unit_submanager { + } + } + ++ pub(super) fn db_insert(&self) { ++ for (_, sub) in self.db.borrow().iter() { ++ sub.db_insert(); ++ } ++ } ++ + pub(super) fn db_compensate_last( + &self, + lframe: (u32, Option, Option), +diff --git a/core/bin/unit/runtime.rs b/core/bin/unit/runtime.rs +index 89e62c5..215a5a7 100644 +--- a/core/bin/unit/runtime.rs ++++ b/core/bin/unit/runtime.rs +@@ -47,6 +47,10 @@ impl ReStation for UnitRT { + self.data.db_map(reload); + } + ++ fn db_insert(&self) { ++ self.data.db_insert(); ++ } ++ + // reload + // repeating protection + fn entry_clear(&self) { +@@ -239,6 +243,13 @@ impl UnitRTData { + } + } + ++ pub(self) fn db_insert(&self) { ++ // If the data changes under db_map() when reload is true, it needs to be inserted. ++ // db_map currently does nothing to do. ++ // QUEUE_LOAD is nothing to do. ++ // QUEUE_TARGET_DEPS is nothing to do. ++ } ++ + pub(self) fn dispatch_load_queue(&self) { + log::trace!("dispatch load queue"); + +diff --git a/core/lib/rel/api.rs b/core/lib/rel/api.rs +index cabf24e..60a3543 100644 +--- a/core/lib/rel/api.rs ++++ b/core/lib/rel/api.rs +@@ -151,6 +151,13 @@ impl Reliability { + self.input_rebuild(); + self.db_compensate(); + self.db_map(reload); ++ if reload { ++ // If daemon-reload or daemon-reexec, we need to update all changes, clear db, and submit all changes to db. ++ self.db_insert(); ++ self.history.reflush(); ++ // Due to changes in db, we need to reload the data from db to cache. ++ self.history.import(); ++ } + self.make_consistent(); + + // restore last's ignore +@@ -263,12 +270,12 @@ impl Reliability { + + fn input_rebuild(&self) { + // ignore history's input +- self.history.ignore_set(true); ++ self.history.switch_set(true); + + self.station.input_rebuild(); + + // restore history's ignore +- self.history.ignore_set(false); ++ self.history.switch_set(false); + } + + fn db_compensate(&self) { +@@ -282,15 +289,24 @@ impl Reliability { + } + + /// map data from database +- /// If reload is true, determine whether the configuration needs to be reloaded based on the situation. ++ /// reload determine whether the configuration needs to be reloaded based on the situation. + fn db_map(&self, reload: bool) { +- // ignore history's input +- self.history.ignore_set(true); ++ // control use buf ++ self.history.switch_set(true); + + self.station.db_map(reload); + +- // restore history's ignore +- self.history.ignore_set(false); ++ // control use cache ++ self.history.switch_set(false); ++ } ++ ++ /// map-data insert to buf, switch is true indicating either daemon-reload or daemon-reexec ++ fn db_insert(&self) { ++ self.history.switch_set(true); ++ ++ self.station.db_insert(); ++ ++ self.history.switch_set(false); + } + + fn make_consistent(&self) { +diff --git a/core/lib/rel/base.rs b/core/lib/rel/base.rs +index 782d54c..274b504 100644 +--- a/core/lib/rel/base.rs ++++ b/core/lib/rel/base.rs +@@ -28,7 +28,7 @@ use std::{env, fs}; + /// K & V that can be deserialized without borrowing any data from the deserializer. + pub struct ReDb { + // control +- ignore: RefCell, ++ switch: RefCell, + + // data + db: Database, SerdeBincode>, +@@ -36,7 +36,8 @@ pub struct ReDb { + add: RefCell>, + del: RefCell>, + name: String, +- //_phantom: PhantomData<&'a K>, ++ buf: RefCell>, // daemon-reload or daemon-reexec will temporarily store the data here first, and finally refreshes it to db. ++ //_phantom: PhantomData<&'a K>, + } + + impl ReDbTable for ReDb +@@ -52,12 +53,17 @@ where + self.cache_2_db(db_wtxn); + } + ++ /// daemon-reload or daemon-reexec export all data to database ++ fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.rebuf_2_db(db_wtxn); ++ } ++ + fn import(&self, db_rtxn: &ReDbRoTxn) { + self.db_2_cache(db_rtxn); + } + +- fn ignore_set(&self, ignore: bool) { +- self.set_ignore(ignore); ++ fn switch_set(&self, switch: bool) { ++ self.set_switch(switch); + } + } + +@@ -70,12 +76,13 @@ where + pub fn new(relir: &Reliability, db_name: &str) -> ReDb { + let db = relir.create_database(Some(db_name)).unwrap(); + ReDb { +- ignore: RefCell::new(false), ++ switch: RefCell::new(false), + db, + cache: RefCell::new(HashMap::new()), + add: RefCell::new(HashMap::new()), + del: RefCell::new(HashSet::new()), + name: String::from(db_name), ++ buf: RefCell::new(HashMap::new()), + //_phantom: PhantomData, + } + } +@@ -89,19 +96,26 @@ where + } + + /// set the ignore flag of data +- pub fn set_ignore(&self, ignore: bool) { +- *self.ignore.borrow_mut() = ignore; ++ pub fn set_switch(&self, switch: bool) { ++ *self.switch.borrow_mut() = switch; + } + + /// insert a entry + pub fn insert(&self, k: K, v: V) { +- if self.ignore() { ++ let switch = self.switch(); ++ log::debug!( ++ "ReDb[{}] switch:{:?} insert, key:{:?}, value:{:?}.", ++ &self.name, ++ switch, ++ &k, ++ &v ++ ); ++ ++ if switch { ++ self.buf.borrow_mut().insert(k, v); + return; + } + +- let n = &self.name; +- log::debug!("ReDb[{}] insert, key: {:?}, value: {:?}.", n, &k, &v); +- + // remove "del" + insert "add" + self.del.borrow_mut().remove(&k); + self.add.borrow_mut().insert(k.clone(), v.clone()); +@@ -112,12 +126,15 @@ where + + /// remove a entry + pub fn remove(&self, k: &K) { +- if self.ignore() { ++ let n = &self.name; ++ let switch = self.switch(); ++ log::debug!("ReDb[{}] switch:{:?}, remove, key:{:?}.", n, switch, &k); ++ ++ if switch { ++ self.buf.borrow_mut().remove(k); + return; + } + +- log::debug!("ReDb[{}] remove, key: {:?}.", &self.name, &k); +- + // remove "add" + insert "del" + self.add.borrow_mut().remove(k); + self.del.borrow_mut().insert(k.clone()); +@@ -136,6 +153,10 @@ where + + /// get the existence of the key + pub fn contains_key(&self, k: &K) -> bool { ++ if self.switch() { ++ return self.buf.borrow().contains_key(k); ++ } ++ + self.cache.borrow().contains_key(k) + } + +@@ -178,6 +199,16 @@ where + self.del.borrow_mut().clear(); + } + ++ /// export all data from cache to database ++ pub fn rebuf_2_db(&self, wtxn: &mut ReDbRwTxn) { ++ // "buf" -> db.put + clear "buf" ++ for (k, v) in self.buf.borrow().iter() { ++ self.db.put(&mut wtxn.0, k, v).expect("history.put"); ++ } ++ ++ self.buf.borrow_mut().clear(); ++ } ++ + /// emport all data from database to cache + pub fn db_2_cache(&self, rtxn: &ReDbRoTxn) + where +@@ -197,8 +228,8 @@ where + } + } + +- fn ignore(&self) -> bool { +- *self.ignore.borrow() ++ fn switch(&self) -> bool { ++ *self.switch.borrow() + } + } + +@@ -228,10 +259,12 @@ pub trait ReDbTable { + fn clear(&self, wtxn: &mut ReDbRwTxn); + /// export all data to database + fn export(&self, wtxn: &mut ReDbRwTxn); ++ /// daemon-reload or daemon-reexec export all data to database ++ fn reexport(&self, wtxn: &mut ReDbRwTxn); + /// import all data from database + fn import(&self, rtxn: &ReDbRoTxn); +- /// set the ignore flag of data +- fn ignore_set(&self, ignore: bool); ++ /// set the switch flag of data, does switch control whether to use cache or buf ++ fn switch_set(&self, switch: bool); + } + + const RELI_PATH_DIR: &str = "/run/sysmaster/reliability"; +diff --git a/core/lib/rel/history.rs b/core/lib/rel/history.rs +index 2e2e03b..846f8c5 100644 +--- a/core/lib/rel/history.rs ++++ b/core/lib/rel/history.rs +@@ -84,6 +84,21 @@ impl ReliHistory { + db_wtxn.0.commit().expect("history.commit"); + } + ++ /// daemon-reload or daemon-reexec clear db and data reflush to db ++ pub fn reflush(&self) { ++ // create transaction ++ let mut db_wtxn = ReDbRwTxn::new(&self.env).expect("history.write_txn"); ++ ++ // flush to db ++ for (_, db) in self.dbs.borrow().iter() { ++ db.clear(&mut db_wtxn); ++ db.reexport(&mut db_wtxn); ++ } ++ ++ // commit ++ db_wtxn.0.commit().expect("history.commit"); ++ } ++ + pub fn import(&self) { + let db_rtxn = ReDbRoTxn::new(&self.env).expect("history.write_txn"); + +@@ -93,11 +108,11 @@ impl ReliHistory { + } + } + +- pub fn ignore_set(&self, ignore: bool) { +- // set ignore +- *self.ignore.borrow_mut() = ignore; ++ pub fn switch_set(&self, switch: bool) { ++ // set switch ++ *self.ignore.borrow_mut() = switch; + for (_, db) in self.dbs.borrow().iter() { +- db.ignore_set(ignore); ++ db.switch_set(switch); + } + } + +diff --git a/core/lib/rel/station.rs b/core/lib/rel/station.rs +index 10e1bed..b06be4a 100644 +--- a/core/lib/rel/station.rs ++++ b/core/lib/rel/station.rs +@@ -90,6 +90,18 @@ impl ReliStation { + } + } + ++ pub fn db_insert(&self) { ++ // level 1 ++ for station in self.get_kind(ReStationKind::Level1).iter() { ++ station.db_insert(); ++ } ++ ++ // level 2 ++ for station in self.get_kind(ReStationKind::Level2).iter() { ++ station.db_insert(); ++ } ++ } ++ + pub fn make_consistent( + &self, + lframe: Option<(u32, Option, Option)>, +-- +2.33.0 + diff --git a/backport-fix-recycle-the-zombie-of-sysmaster.patch b/backport-fix-recycle-the-zombie-of-sysmaster.patch new file mode 100644 index 0000000..935612b --- /dev/null +++ b/backport-fix-recycle-the-zombie-of-sysmaster.patch @@ -0,0 +1,117 @@ +From aedd0986761b7310321c31496917e93ae47a0d4b Mon Sep 17 00:00:00 2001 +From: huyubiao +Date: Wed, 31 May 2023 01:15:09 +0800 +Subject: [PATCH] fix: recycle the zombie of sysmaster + +--- + exts/init/src/runtime/mod.rs | 18 ++++++------------ + exts/init/src/runtime/signals.rs | 10 ++-------- + exts/sctl/src/main.rs | 2 +- + 3 files changed, 9 insertions(+), 21 deletions(-) + +diff --git a/exts/init/src/runtime/mod.rs b/exts/init/src/runtime/mod.rs +index 11c7a8d..e37ff01 100644 +--- a/exts/init/src/runtime/mod.rs ++++ b/exts/init/src/runtime/mod.rs +@@ -18,7 +18,6 @@ mod timer; + + use comm::{Comm, CommType}; + use epoll::Epoll; +-use libc::signalfd_siginfo; + use nix::errno::Errno; + use nix::libc; + use nix::sys::epoll::EpollEvent; +@@ -190,7 +189,7 @@ impl RunTime { + if let Some(siginfo) = self.signals.read(event)? { + let signo = siginfo.ssi_signo as i32; + match signo { +- _x if self.signals.is_zombie(signo) => self.do_recycle(siginfo), ++ _x if self.signals.is_zombie(signo) => self.do_recycle(), + _x if self.signals.is_restart(signo) => self.do_reexec(), + _x if self.signals.is_unrecover(signo) => self.change_to_unrecover(), + _ => {} +@@ -203,7 +202,7 @@ impl RunTime { + if let Some(siginfo) = self.signals.read(event)? { + let signo = siginfo.ssi_signo as i32; + match signo { +- _x if self.signals.is_zombie(signo) => self.do_recycle(siginfo), ++ _x if self.signals.is_zombie(signo) => self.do_recycle(), + _x if self.signals.is_restart(signo) => self.do_reexec(), + _x if self.signals.is_switch_root(signo) => self.send_switch_root_signal(), + _ => {} +@@ -217,10 +216,9 @@ impl RunTime { + let signo = siginfo.ssi_signo as i32; + match signo { + _x if self.signals.is_zombie(signo) => { ++ self.signals.recycle_zombie(); + if self.is_sysmaster(siginfo.ssi_pid as i32) && self.switching { + self.reexec_self() +- } else { +- self.signals.recycle_zombie(Pid::from_raw(0)) + } + } + _x if self.signals.is_restart(signo) => self.do_recreate(), +@@ -233,8 +231,7 @@ impl RunTime { + fn change_to_unrecover(&mut self) { + println!("change run state to unrecover"); + self.state = InitState::Unrecover; +- // Attempt to recycle the zombie sysmaster. +- self.signals.recycle_zombie(Pid::from_raw(0)); ++ self.signals.recycle_zombie(); + } + + fn do_reexec(&mut self) { +@@ -250,11 +247,8 @@ impl RunTime { + } + } + +- fn do_recycle(&mut self, siginfo: signalfd_siginfo) { +- let pid = siginfo.ssi_pid as i32; +- if !self.is_sysmaster(pid) { +- self.signals.recycle_zombie(Pid::from_raw(pid)); +- } ++ fn do_recycle(&mut self) { ++ self.signals.recycle_zombie(); + } + + fn create_sysmaster(&mut self) -> Result<(), Errno> { +diff --git a/exts/init/src/runtime/signals.rs b/exts/init/src/runtime/signals.rs +index a30c7c2..eee088c 100644 +--- a/exts/init/src/runtime/signals.rs ++++ b/exts/init/src/runtime/signals.rs +@@ -148,17 +148,11 @@ impl Signals { + } + } + +- pub fn recycle_zombie(&mut self, dest_pid: unistd::Pid) { ++ pub fn recycle_zombie(&mut self) { + // peek signal + let flags = WaitPidFlag::WEXITED | WaitPidFlag::WNOHANG | WaitPidFlag::WNOWAIT; + loop { +- // get wait information +- let mut id_flag = Id::All; +- if dest_pid.as_raw() > 0 { +- id_flag = Id::Pid(dest_pid); +- } +- +- let wait_status = match wait::waitid(id_flag, flags) { ++ let wait_status = match wait::waitid(Id::All, flags) { + Ok(status) => status, + Err(_) => return, + }; +diff --git a/exts/sctl/src/main.rs b/exts/sctl/src/main.rs +index f799629..ef360c7 100644 +--- a/exts/sctl/src/main.rs ++++ b/exts/sctl/src/main.rs +@@ -203,7 +203,7 @@ fn main() { + let stream = match TcpStream::connect(&addrs[..]) { + Err(e) => { + eprintln!("Failed to connect to sysmaster: {}", e); +- exit(e.raw_os_error().unwrap() as i32); ++ exit(e.raw_os_error().unwrap()); + } + Ok(v) => v, + }; +-- +2.33.0 + diff --git a/backport-fix-support-for-adjusting-the-memory-usage-of-databa.patch b/backport-fix-support-for-adjusting-the-memory-usage-of-databa.patch new file mode 100644 index 0000000..c020672 --- /dev/null +++ b/backport-fix-support-for-adjusting-the-memory-usage-of-databa.patch @@ -0,0 +1,1306 @@ +From 2c9a53516432d039501bc58652b9f05b57356373 Mon Sep 17 00:00:00 2001 +From: xuxiaozhou1 +Date: Mon, 5 Jun 2023 20:33:00 +0800 +Subject: [PATCH] fix: support for adjusting the memory usage of database + +--- + coms/mount/src/rentry.rs | 12 +-- + coms/service/src/rentry.rs | 12 +-- + coms/socket/src/rentry.rs | 18 ++-- + coms/target/src/rentry.rs | 6 +- + core/bin/job/junit.rs | 34 ++++++-- + core/bin/job/manager.rs | 9 +- + core/bin/job/notify.rs | 6 +- + core/bin/job/stat.rs | 6 +- + core/bin/job/table.rs | 6 +- + core/bin/job/transaction.rs | 10 ++- + core/bin/main.rs | 2 +- + core/bin/manager/config.rs | 5 ++ + core/bin/manager/mod.rs | 11 ++- + core/bin/unit/datastore/child.rs | 14 ++- + core/bin/unit/datastore/deps.rs | 14 ++- + core/bin/unit/datastore/sets.rs | 18 ++-- + core/bin/unit/entry/uentry.rs | 6 +- + core/bin/unit/manager.rs | 5 +- + core/bin/unit/runtime.rs | 14 ++- + core/lib/rel/api.rs | 52 +++++++++-- + core/lib/rel/base.rs | 115 ++++++++++++------------ + core/lib/rel/history.rs | 144 ++++++++++++++++++++++++++----- + core/lib/rel/mod.rs | 2 +- + docs/man/sysmaster.conf.md | 8 ++ + 24 files changed, 376 insertions(+), 153 deletions(-) + mode change 100644 => 100755 core/bin/unit/datastore/child.rs + +diff --git a/coms/mount/src/rentry.rs b/coms/mount/src/rentry.rs +index 45c0ce0..bf0004e 100644 +--- a/coms/mount/src/rentry.rs ++++ b/coms/mount/src/rentry.rs +@@ -103,8 +103,8 @@ impl ReDbTable for MountReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -112,7 +112,7 @@ impl ReDbTable for MountReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } + +@@ -125,8 +125,8 @@ impl ReDbTable for MountReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -134,6 +134,6 @@ impl ReDbTable for MountReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } +diff --git a/coms/service/src/rentry.rs b/coms/service/src/rentry.rs +index a68a3df..7099e01 100644 +--- a/coms/service/src/rentry.rs ++++ b/coms/service/src/rentry.rs +@@ -623,8 +623,8 @@ impl ReDbTable for ServiceReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -632,7 +632,7 @@ impl ReDbTable for ServiceReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } + +@@ -645,8 +645,8 @@ impl ReDbTable for ServiceReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -654,6 +654,6 @@ impl ReDbTable for ServiceReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } +diff --git a/coms/socket/src/rentry.rs b/coms/socket/src/rentry.rs +index 44f5be3..c0f5818 100644 +--- a/coms/socket/src/rentry.rs ++++ b/coms/socket/src/rentry.rs +@@ -335,8 +335,8 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -344,7 +344,7 @@ impl ReDbTable for SocketReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } + +@@ -357,8 +357,8 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -366,7 +366,7 @@ impl ReDbTable for SocketReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } + +@@ -379,8 +379,8 @@ impl ReDbTable for SocketReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -388,6 +388,6 @@ impl ReDbTable for SocketReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } +diff --git a/coms/target/src/rentry.rs b/coms/target/src/rentry.rs +index 9d3f90c..9e218b3 100644 +--- a/coms/target/src/rentry.rs ++++ b/coms/target/src/rentry.rs +@@ -77,8 +77,8 @@ impl ReDbTable for TargetReDb { + self.0.cache_2_db(db_wtxn); + } + +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.0.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.0.buffer_2_db(db_wtxn); + } + + fn import<'a>(&self, db_rtxn: &ReDbRoTxn) { +@@ -86,6 +86,6 @@ impl ReDbTable for TargetReDb { + } + + fn switch_set(&self, switch: bool) { +- self.0.set_switch(switch); ++ self.0.switch_buffer(switch); + } + } +diff --git a/core/bin/job/junit.rs b/core/bin/job/junit.rs +index f1a55b6..b56ec11 100644 +--- a/core/bin/job/junit.rs ++++ b/core/bin/job/junit.rs +@@ -817,11 +817,13 @@ mod tests { + use crate::unit::{JobMode, UnitRe}; + use basic::logger; + use event::Events; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn juv_api_len() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(JobRe::new(&reli)); + let unit_test1 = prepare_unit(&reli); + let mut id: u128 = 0; +@@ -848,7 +850,9 @@ mod tests { + + #[test] + fn juv_api_merge() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let (_, job_start, _, _, _) = prepare_jobs(&reli, &unit_test1, JobMode::Replace); + let (_, stage_start, _, _, _) = prepare_jobs(&reli, &unit_test1, JobMode::Replace); +@@ -881,7 +885,9 @@ mod tests { + + #[test] + fn juv_api_reshuffle() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let (job_nop, job_start, job_reload, job_restart, _) = + prepare_jobs(&reli, &unit_test1, JobMode::Replace); +@@ -923,7 +929,9 @@ mod tests { + + #[test] + fn juv_api_replace_with_unirreversible() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let mode = JobMode::Replace; + let (_, uv_start, _, _, uv_stop) = prepare_jobs(&reli, &unit_test1, mode); +@@ -958,7 +966,9 @@ mod tests { + + #[test] + fn juv_api_replace_with_irreversible() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let mode = JobMode::ReplaceIrreversible; + let (_, uv_start, _, _, uv_stop) = prepare_jobs(&reli, &unit_test1, mode); +@@ -993,7 +1003,9 @@ mod tests { + + #[test] + fn juv_api_order_with_unignore() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let mode = JobMode::Replace; + let (uv_nop, uv_start, _, _, uv_stop) = prepare_jobs(&reli, &unit_test1, mode); +@@ -1093,7 +1105,9 @@ mod tests { + + #[test] + fn juv_api_order_with_ignore() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let mode = JobMode::IgnoreDependencies; + let (uv_nop, uv_start, _, _, uv_stop) = prepare_jobs(&reli, &unit_test1, mode); +@@ -1192,7 +1206,9 @@ mod tests { + + #[test] + fn juv_calc_ready() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let unit_test1 = prepare_unit(&reli); + let (job_nop, job_start, _, _, _) = prepare_jobs(&reli, &unit_test1, JobMode::Replace); + let uv = JobUnit::new(Rc::clone(&unit_test1)); +diff --git a/core/bin/job/manager.rs b/core/bin/job/manager.rs +index 028c4d5..85af037 100755 +--- a/core/bin/job/manager.rs ++++ b/core/bin/job/manager.rs +@@ -814,12 +814,15 @@ mod tests { + use crate::unit::DataManager; + use crate::unit::{UnitRe, UnitRelations}; + use basic::logger; ++ use sysmaster::rel::ReliConf; + + //#[test] + #[allow(dead_code)] + fn job_reli() { + logger::init_log_to_console("test_unit_load", log::LevelFilter::Trace); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let event = Rc::new(Events::new().unwrap()); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); +@@ -1148,7 +1151,9 @@ mod tests { + ) { + let event = Rc::new(Events::new().unwrap()); + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/job/notify.rs b/core/bin/job/notify.rs +index 271b648..8678633 100644 +--- a/core/bin/job/notify.rs ++++ b/core/bin/job/notify.rs +@@ -147,7 +147,7 @@ mod tests { + use crate::unit::UnitX; + + use basic::logger; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn jn_api() { +@@ -178,7 +178,9 @@ mod tests { + + fn prepare_unit_single() -> (Rc, Rc, Rc) { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/job/stat.rs b/core/bin/job/stat.rs +index 1163d00..2711412 100644 +--- a/core/bin/job/stat.rs ++++ b/core/bin/job/stat.rs +@@ -450,7 +450,7 @@ mod tests { + use crate::unit::UnitX; + use basic::logger; + use event::Events; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn js_api() { +@@ -486,7 +486,9 @@ mod tests { + + fn prepare_unit_single() -> (Rc, Rc, Rc) { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/job/table.rs b/core/bin/job/table.rs +index 7b77b1a..5a9f7b2 100644 +--- a/core/bin/job/table.rs ++++ b/core/bin/job/table.rs +@@ -1022,12 +1022,14 @@ mod tests { + use crate::unit::UnitRe; + use basic::logger; + use event::Events; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn job_table_record_suspend() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let job_rentry = Rc::new(JobRe::new(&reli)); +diff --git a/core/bin/job/transaction.rs b/core/bin/job/transaction.rs +index b79e6ff..c4842c4 100644 +--- a/core/bin/job/transaction.rs ++++ b/core/bin/job/transaction.rs +@@ -420,7 +420,7 @@ mod tests { + use crate::unit::{UnitRe, UnitRelations}; + use basic::logger; + use event::Events; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn jt_api_expand_check() {} +@@ -682,7 +682,9 @@ mod tests { + relation: UnitRelations, + ) -> (Rc, Rc, Rc, Rc) { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let name_test1 = String::from("test1.service"); +@@ -699,7 +701,9 @@ mod tests { + + fn prepare_unit_single() -> (Rc, Rc, Rc) { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/main.rs b/core/bin/main.rs +index eb06ed6..0239ce9 100644 +--- a/core/bin/main.rs ++++ b/core/bin/main.rs +@@ -117,10 +117,10 @@ fn main() -> Result<()> { + if !switch { + if !args.deserialize { + manager.debug_clear_restore(); ++ log::info!("debug: clear data restored."); + } + // if switch is false unregister init's reexec signal. + register_reexec_signal(false); +- log::info!("debug: clear data restored."); + } + + manager.setup_cgroup()?; +diff --git a/core/bin/manager/config.rs b/core/bin/manager/config.rs +index b5b2857..8b7aaab 100644 +--- a/core/bin/manager/config.rs ++++ b/core/bin/manager/config.rs +@@ -15,6 +15,7 @@ + use confique::Config; + + pub const SYSTEM_CONFIG: &str = "/etc/sysmaster/system.toml"; ++const RELI_HISTORY_MAPSIZE_DEFAULT: usize = 1048576; // 1M + + #[derive(Config, Debug)] + pub struct ManagerConfig { +@@ -29,6 +30,9 @@ pub struct ManagerConfig { + pub LogTarget: String, + #[config(default = "")] + pub LogFile: String, ++ ++ #[config(default = 1048576)] // RELI_HISTORY_MAPSIZE_DEFAULT ++ pub DbSize: usize, + } + + impl ManagerConfig { +@@ -51,6 +55,7 @@ impl Default for ManagerConfig { + LogLevel: log::LevelFilter::Debug, + LogTarget: "console".to_string(), + LogFile: String::new(), ++ DbSize: RELI_HISTORY_MAPSIZE_DEFAULT, + } + } + } +diff --git a/core/bin/manager/mod.rs b/core/bin/manager/mod.rs +index 17a2e29..ee49efa 100644 +--- a/core/bin/manager/mod.rs ++++ b/core/bin/manager/mod.rs +@@ -40,7 +40,7 @@ use std::collections::HashSet; + use std::path::PathBuf; + use std::rc::Rc; + use sysmaster::error::*; +-use sysmaster::rel::{ReliLastFrame, Reliability}; ++use sysmaster::rel::{ReliConf, ReliLastFrame, Reliability}; + + use alive_timer::AliveTimer; + +@@ -203,7 +203,11 @@ impl Manager { + /// create factory instance + pub fn new(mode: Mode, action: Action, manager_config: Rc) -> Self { + let event = Rc::new(Events::new().unwrap()); +- let reli = Rc::new(Reliability::new(rentry::RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new() ++ .set_map_size(manager_config.DbSize) ++ .set_max_dbs(rentry::RELI_HISTORY_MAX_DBS), ++ )); + let mut l_path = LookupPaths::new(); + l_path.init_lookup_paths(); + let lookup_path = Rc::new(l_path); +@@ -386,6 +390,9 @@ impl Manager { + fn prepare_reexec(&self) -> Result<()> { + // restore external resource, like: fd, ... + // do nothing now ++ ++ // compact db ++ self.reli.compact()?; + Ok(()) + } + +diff --git a/core/bin/unit/datastore/child.rs b/core/bin/unit/datastore/child.rs +old mode 100644 +new mode 100755 +index f4460f0..be29470 +--- a/core/bin/unit/datastore/child.rs ++++ b/core/bin/unit/datastore/child.rs +@@ -167,12 +167,14 @@ mod tests { + use crate::unit::rentry::UnitRe; + use crate::unit::test::test_utils; + use basic::logger; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + #[should_panic] + fn child_add_watch_pid_empty() { +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test3 = String::from("test3.service"); +@@ -185,7 +187,9 @@ mod tests { + #[test] + fn child_add_watch_pid() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +@@ -210,7 +214,9 @@ mod tests { + #[test] + fn child_unwatch_pid() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/unit/datastore/deps.rs b/core/bin/unit/datastore/deps.rs +index 80836ef..cd934b7 100644 +--- a/core/bin/unit/datastore/deps.rs ++++ b/core/bin/unit/datastore/deps.rs +@@ -392,12 +392,14 @@ mod tests { + use crate::unit::data::DataManager; + use crate::unit::test::test_utils; + use basic::logger; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn dep_insert() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let dep = UnitDep::new(&rentry, &Rc::new(sets)); +@@ -440,7 +442,9 @@ mod tests { + #[test] + fn dep_gets_atom() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let dep = UnitDep::new(&rentry, &Rc::new(sets)); +@@ -495,7 +499,9 @@ mod tests { + #[test] + fn dep_is_dep_atom_with() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let dep = UnitDep::new(&rentry, &Rc::new(sets)); +diff --git a/core/bin/unit/datastore/sets.rs b/core/bin/unit/datastore/sets.rs +index 9a94a4b..4e105e7 100644 +--- a/core/bin/unit/datastore/sets.rs ++++ b/core/bin/unit/datastore/sets.rs +@@ -89,12 +89,14 @@ mod tests { + use crate::unit::rentry::UnitRe; + use crate::unit::test::test_utils; + use basic::logger; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn sets_insert() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +@@ -115,7 +117,9 @@ mod tests { + #[test] + fn sets_remove() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +@@ -142,7 +146,9 @@ mod tests { + #[test] + fn sets_get() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +@@ -167,7 +173,9 @@ mod tests { + #[test] + fn sets_getall() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let sets = UnitSets::new(); + let name_test1 = String::from("test1.service"); +diff --git a/core/bin/unit/entry/uentry.rs b/core/bin/unit/entry/uentry.rs +index 0c6e044..5b93e97 100644 +--- a/core/bin/unit/entry/uentry.rs ++++ b/core/bin/unit/entry/uentry.rs +@@ -763,13 +763,15 @@ mod tests { + use crate::unit::test::test_utils::UmIfD; + use basic::{logger, path_lookup::LookupPaths}; + use std::rc::Rc; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + use sysmaster::unit::UnitType; + + use crate::{plugin::Plugin, unit::data::DataManager, unit::util::UnitFile}; + fn unit_init() -> Rc { + logger::init_log_to_console("test_unit_entry", log::LevelFilter::Trace); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + + let mut l_path = LookupPaths::new(); +diff --git a/core/bin/unit/manager.rs b/core/bin/unit/manager.rs +index d5c3e10..505a051 100644 +--- a/core/bin/unit/manager.rs ++++ b/core/bin/unit/manager.rs +@@ -1404,6 +1404,7 @@ mod tests { + use nix::sys::wait::WaitStatus; + use std::thread; + use std::time::Duration; ++ use sysmaster::rel::ReliConf; + use sysmaster::unit::UnitActiveState; + + fn init_dm_for_test() -> (Rc, Rc, Rc) { +@@ -1414,7 +1415,9 @@ mod tests { + + let event = Rc::new(Events::new().unwrap()); + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let state = Rc::new(RefCell::new(State::Init)); + let um = UnitManager::new( + &event, +diff --git a/core/bin/unit/runtime.rs b/core/bin/unit/runtime.rs +index 215a5a7..5c4f45e 100644 +--- a/core/bin/unit/runtime.rs ++++ b/core/bin/unit/runtime.rs +@@ -380,12 +380,14 @@ mod tests { + use crate::unit::rentry::UnitRe; + use crate::unit::test; + use basic::logger; +- use sysmaster::rel::Reliability; ++ use sysmaster::rel::{ReliConf, Reliability}; + + #[test] + fn rt_push_load_queue() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let rt = UnitRT::new(&reli, &rentry, &db); +@@ -412,7 +414,9 @@ mod tests { + #[test] + fn rt_dispatch_load_queue() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let rt = UnitRT::new(&reli, &rentry, &db); +@@ -430,7 +434,9 @@ mod tests { + #[test] + fn rt_dispatch_target_dep_queue() { + let dm = Rc::new(DataManager::new()); +- let reli = Rc::new(Reliability::new(RELI_HISTORY_MAX_DBS)); ++ let reli = Rc::new(Reliability::new( ++ ReliConf::new().set_max_dbs(RELI_HISTORY_MAX_DBS), ++ )); + let rentry = Rc::new(UnitRe::new(&reli)); + let db = Rc::new(UnitDb::new(&rentry)); + let rt = UnitRT::new(&reli, &rentry, &db); +diff --git a/core/lib/rel/api.rs b/core/lib/rel/api.rs +index 60a3543..59e4ac9 100644 +--- a/core/lib/rel/api.rs ++++ b/core/lib/rel/api.rs +@@ -36,6 +36,41 @@ const RELI_DEBUG_PANIC_FILE: &str = "panic.debug"; + const RELI_DEBUG_PFIRST_FILE: &str = "panic_first.debug"; + const RELI_DEBUG_SLEEP_FILE: &str = "sleep.debug"; + ++/// the configuration of reliability instance ++pub struct ReliConf { ++ // history ++ map_size: Option, ++ max_dbs: Option, ++} ++ ++impl ReliConf { ++ /// create the configuration ++ pub fn new() -> ReliConf { ++ ReliConf { ++ map_size: None, ++ max_dbs: None, ++ } ++ } ++ ++ /// set the map size ++ pub fn set_map_size(&mut self, map_size: usize) -> &mut Self { ++ self.map_size = Some(map_size); ++ self ++ } ++ ++ /// set the max numbers of db ++ pub fn set_max_dbs(&mut self, max_dbs: u32) -> &mut Self { ++ self.max_dbs = Some(max_dbs); ++ self ++ } ++} ++ ++impl Default for ReliConf { ++ fn default() -> Self { ++ ReliConf::new() ++ } ++} ++ + /// reliability instance + #[derive(Debug)] + pub struct Reliability { +@@ -62,13 +97,13 @@ impl Drop for Reliability { + + impl Reliability { + /// create reliability instance +- pub fn new(max_db: u32) -> Reliability { ++ pub fn new(conf: &ReliConf) -> Reliability { + let dir_s = reli_prepare().expect("reliability prepare"); + let reli = Reliability { + dir_string: dir_s.clone(), + enable: ReliEnable::new(&dir_s), + last: ReliLast::new(&dir_s), +- history: ReliHistory::new(&dir_s, max_db), ++ history: ReliHistory::new(&dir_s, conf.map_size, conf.max_dbs), + pending: ReliPending::new(&dir_s), + station: ReliStation::new(), + }; +@@ -154,7 +189,7 @@ impl Reliability { + if reload { + // If daemon-reload or daemon-reexec, we need to update all changes, clear db, and submit all changes to db. + self.db_insert(); +- self.history.reflush(); ++ self.history.flush(); + // Due to changes in db, we need to reload the data from db to cache. + self.history.import(); + } +@@ -168,6 +203,11 @@ impl Reliability { + self.last.clear_frame(); + } + ++ /// compact the database ++ pub fn compact(&self) -> Result<()> { ++ self.history.compact() ++ } ++ + /// get the enable flag + pub fn enable(&self) -> bool { + self.enable.enable() +@@ -205,9 +245,9 @@ impl Reliability { + self.last.ignore() + } + +- /// get the ignore flag of history data +- pub fn history_ignore(&self) -> bool { +- self.history.ignore() ++ /// get the switch flag of history data ++ pub fn history_switch(&self) -> bool { ++ self.history.switch() + } + + /// do the debug action: enable the recover process +diff --git a/core/lib/rel/base.rs b/core/lib/rel/base.rs +index 274b504..29200df 100644 +--- a/core/lib/rel/base.rs ++++ b/core/lib/rel/base.rs +@@ -31,13 +31,19 @@ pub struct ReDb { + switch: RefCell, + + // data ++ /* database */ + db: Database, SerdeBincode>, ++ ++ /* cache */ + cache: RefCell>, + add: RefCell>, + del: RefCell>, ++ ++ /* buffer */ ++ buffer: RefCell>, // daemon-reload or daemon-reexec will temporarily store the data here first, and finally refreshes it to db. ++ ++ /* property */ + name: String, +- buf: RefCell>, // daemon-reload or daemon-reexec will temporarily store the data here first, and finally refreshes it to db. +- //_phantom: PhantomData<&'a K>, + } + + impl ReDbTable for ReDb +@@ -53,9 +59,8 @@ where + self.cache_2_db(db_wtxn); + } + +- /// daemon-reload or daemon-reexec export all data to database +- fn reexport(&self, db_wtxn: &mut ReDbRwTxn) { +- self.rebuf_2_db(db_wtxn); ++ fn flush(&self, db_wtxn: &mut ReDbRwTxn) { ++ self.buffer_2_db(db_wtxn); + } + + fn import(&self, db_rtxn: &ReDbRoTxn) { +@@ -63,7 +68,7 @@ where + } + + fn switch_set(&self, switch: bool) { +- self.set_switch(switch); ++ self.switch_buffer(switch); + } + } + +@@ -81,9 +86,8 @@ where + cache: RefCell::new(HashMap::new()), + add: RefCell::new(HashMap::new()), + del: RefCell::new(HashSet::new()), ++ buffer: RefCell::new(HashMap::new()), + name: String::from(db_name), +- buf: RefCell::new(HashMap::new()), +- //_phantom: PhantomData, + } + } + +@@ -95,52 +99,56 @@ where + self.del.borrow_mut().clear(); + } + +- /// set the ignore flag of data +- pub fn set_switch(&self, switch: bool) { ++ /// set the buffer-switch flag of data ++ pub fn switch_buffer(&self, switch: bool) { + *self.switch.borrow_mut() = switch; + } + + /// insert a entry + pub fn insert(&self, k: K, v: V) { + let switch = self.switch(); +- log::debug!( +- "ReDb[{}] switch:{:?} insert, key:{:?}, value:{:?}.", +- &self.name, +- switch, +- &k, +- &v +- ); ++ log::debug!("ReDb[{}] insert, key:{:?}, value:{:?}.", &self.name, &k, &v); ++ log::debug!("insert with switch:{:?}.", switch); + + if switch { +- self.buf.borrow_mut().insert(k, v); +- return; +- } +- +- // remove "del" + insert "add" +- self.del.borrow_mut().remove(&k); +- self.add.borrow_mut().insert(k.clone(), v.clone()); ++ // update buffer only ++ self.buffer.borrow_mut().insert(k, v); ++ } else { ++ // remove "del" + insert "add" ++ self.del.borrow_mut().remove(&k); ++ self.add.borrow_mut().insert(k.clone(), v.clone()); + +- // update cache +- self.cache.borrow_mut().insert(k, v); ++ // update cache ++ self.cache.borrow_mut().insert(k, v); ++ } + } + + /// remove a entry + pub fn remove(&self, k: &K) { +- let n = &self.name; + let switch = self.switch(); +- log::debug!("ReDb[{}] switch:{:?}, remove, key:{:?}.", n, switch, &k); ++ log::debug!("ReDb[{}] remove, key:{:?}.", &self.name, &k); ++ log::debug!("remove with switch:{:?}.", switch); + + if switch { +- self.buf.borrow_mut().remove(k); +- return; +- } ++ // update buffer only ++ self.buffer.borrow_mut().remove(k); ++ } else { ++ // remove "add" + insert "del" ++ self.add.borrow_mut().remove(k); ++ self.del.borrow_mut().insert(k.clone()); + +- // remove "add" + insert "del" +- self.add.borrow_mut().remove(k); +- self.del.borrow_mut().insert(k.clone()); ++ // update cache ++ self.cache.borrow_mut().remove(k); ++ } ++ } + +- // update cache +- self.cache.borrow_mut().remove(k); ++ /// get the existence of the key ++ pub fn contains_key(&self, k: &K) -> bool { ++ if self.switch() { ++ self.buffer.borrow().contains_key(k) ++ } else { ++ self.cache.borrow().contains_key(k) ++ } + } + + /// get a entry +@@ -151,15 +159,6 @@ where + value + } + +- /// get the existence of the key +- pub fn contains_key(&self, k: &K) -> bool { +- if self.switch() { +- return self.buf.borrow().contains_key(k); +- } +- +- self.cache.borrow().contains_key(k) +- } +- + /// get all keys + pub fn keys(&self) -> Vec { + let keys = self +@@ -184,7 +183,7 @@ where + entries + } + +- /// export all data from cache to database ++ /// export changed data from cache to database + pub fn cache_2_db(&self, wtxn: &mut ReDbRwTxn) { + // "add" -> db.put + clear "add" + for (k, v) in self.add.borrow().iter() { +@@ -199,17 +198,19 @@ where + self.del.borrow_mut().clear(); + } + +- /// export all data from cache to database +- pub fn rebuf_2_db(&self, wtxn: &mut ReDbRwTxn) { +- // "buf" -> db.put + clear "buf" +- for (k, v) in self.buf.borrow().iter() { ++ /// flush all data from buffer to database ++ pub fn buffer_2_db(&self, wtxn: &mut ReDbRwTxn) { ++ // clear all data, including "db" and "cache" ++ self.do_clear(wtxn); ++ ++ // "buffer" -> db.put + clear "buffer" ++ for (k, v) in self.buffer.borrow().iter() { + self.db.put(&mut wtxn.0, k, v).expect("history.put"); + } +- +- self.buf.borrow_mut().clear(); ++ self.buffer.borrow_mut().clear(); + } + +- /// emport all data from database to cache ++ /// import all data from database to cache + pub fn db_2_cache(&self, rtxn: &ReDbRoTxn) + where + K: DeserializeOwned, +@@ -257,13 +258,13 @@ impl<'e> ReDbRoTxn<'e> { + pub trait ReDbTable { + /// clear all data + fn clear(&self, wtxn: &mut ReDbRwTxn); +- /// export all data to database ++ /// export the changed data to database + fn export(&self, wtxn: &mut ReDbRwTxn); +- /// daemon-reload or daemon-reexec export all data to database +- fn reexport(&self, wtxn: &mut ReDbRwTxn); ++ /// flush all data to database ++ fn flush(&self, wtxn: &mut ReDbRwTxn); + /// import all data from database + fn import(&self, rtxn: &ReDbRoTxn); +- /// set the switch flag of data, does switch control whether to use cache or buf ++ /// set the switch flag of data, does switch control whether to use cache or buffer + fn switch_set(&self, switch: bool); + } + +diff --git a/core/lib/rel/history.rs b/core/lib/rel/history.rs +index 846f8c5..f9e2146 100644 +--- a/core/lib/rel/history.rs ++++ b/core/lib/rel/history.rs +@@ -12,18 +12,29 @@ + + use super::base::{ReDbRoTxn, ReDbRwTxn, ReDbTable}; + use crate::error::*; +-use heed::{Env, EnvOpenOptions}; ++use heed::{CompactionOption, Env, EnvOpenOptions}; + use std::cell::RefCell; + use std::collections::HashMap; +-use std::path::Path; ++use std::fmt; ++use std::fs::{self, File}; ++use std::path::{Path, PathBuf}; + use std::rc::Rc; +-use std::{fmt, fs}; ++ ++const RELI_HISTORY_A_DIR: &str = "a"; ++const RELI_HISTORY_B_DIR: &str = "b"; ++const RELI_HISTORY_BFLAG_FILE: &str = "b.effect"; + + const RELI_HISTORY_DIR: &str = "history.mdb"; ++const RELI_HISTORY_DATA_FILE: &str = "data.mdb"; ++const RELI_HISTORY_LOCK_FILE: &str = "lock.mdb"; + + pub struct ReliHistory { + // control +- ignore: RefCell, ++ switch: RefCell, ++ ++ // directory ++ b_exist: bool, ++ hdir: String, // home-directory + + // environment + env: Env, +@@ -42,18 +53,19 @@ impl fmt::Debug for ReliHistory { + } + + impl ReliHistory { +- pub fn new(dir_str: &str, max: u32) -> ReliHistory { +- // init environment +- let path = Path::new(dir_str).join(RELI_HISTORY_DIR); +- let env = EnvOpenOptions::new() +- .map_size(10 * 1024 * 1024) +- .max_dbs(max) +- .open(path) +- .unwrap(); ++ pub fn new(dir_str: &str, map_size: Option, max_dbs: Option) -> ReliHistory { ++ // init environment, path: dir/history.mdb/(a|b)/ ++ let history = history_path_get(dir_str); ++ let b_exist = bflag_path_get(history.clone()).exists(); ++ let path = history.join(&subdir_cur_get(b_exist)); ++ let env = open_env(path.clone(), map_size, max_dbs).expect("history open env"); ++ log::info!("history with path {:?} successfully.", path); + + // return + ReliHistory { +- ignore: RefCell::new(false), ++ switch: RefCell::new(false), ++ b_exist, ++ hdir: String::from(dir_str), + env, + dbs: RefCell::new(HashMap::new()), + } +@@ -75,7 +87,7 @@ impl ReliHistory { + // create transaction + let mut db_wtxn = ReDbRwTxn::new(&self.env).expect("history.write_txn"); + +- // flush to db ++ // export to db + for (_, db) in self.dbs.borrow().iter() { + db.export(&mut db_wtxn); + } +@@ -84,15 +96,13 @@ impl ReliHistory { + db_wtxn.0.commit().expect("history.commit"); + } + +- /// daemon-reload or daemon-reexec clear db and data reflush to db +- pub fn reflush(&self) { ++ pub(super) fn flush(&self) { + // create transaction + let mut db_wtxn = ReDbRwTxn::new(&self.env).expect("history.write_txn"); + + // flush to db + for (_, db) in self.dbs.borrow().iter() { +- db.clear(&mut db_wtxn); +- db.reexport(&mut db_wtxn); ++ db.flush(&mut db_wtxn); + } + + // commit +@@ -108,9 +118,48 @@ impl ReliHistory { + } + } + ++ pub(super) fn compact(&self) -> Result<()> { ++ // a -> b or b -> a ++ // prepare next ++ let history = history_path_get(&self.hdir); ++ let next_path = history.join(&subdir_next_get(self.b_exist)); ++ let next_file = next_path.join(RELI_HISTORY_DATA_FILE); ++ ++ // clear next: delete and re-create the whole directory ++ fs::remove_dir_all(next_path.clone()).context(IoSnafu)?; ++ fs::create_dir_all(next_path).context(IoSnafu)?; ++ ++ // copy to next ++ self.env ++ .copy_to_path(next_file.clone(), CompactionOption::Disabled) ++ .context(HeedSnafu)?; ++ log::info!("compact to file {:?} successfully.", next_file); ++ ++ // remark the next flag at last: the another one ++ let bflag = bflag_path_get(history.clone()); ++ if self.b_exist { ++ fs::remove_file(bflag).context(IoSnafu)?; ++ } else { ++ File::create(bflag).context(IoSnafu)?; ++ } ++ ++ // try to clear previous: it would be done in the next re-exec, but we try to delete it as soon as possible. ++ let cur_path = history.join(subdir_cur_get(self.b_exist)); ++ let cur_data = cur_path.join(RELI_HISTORY_DATA_FILE); ++ let cur_lock = cur_path.join(RELI_HISTORY_LOCK_FILE); ++ if let Err(e) = fs::remove_file(cur_data.clone()) { ++ log::error!("remove data file {:?} failed, err = {:?}", cur_data, e); ++ } ++ if let Err(e) = fs::remove_file(cur_lock.clone()) { ++ log::error!("remove lock file {:?} failed, err = {:?}", cur_lock, e); ++ } ++ ++ Ok(()) ++ } ++ + pub fn switch_set(&self, switch: bool) { + // set switch +- *self.ignore.borrow_mut() = switch; ++ *self.switch.borrow_mut() = switch; + for (_, db) in self.dbs.borrow().iter() { + db.switch_set(switch); + } +@@ -124,16 +173,67 @@ impl ReliHistory { + self.dbs.borrow_mut().clear(); + } + +- pub fn ignore(&self) -> bool { +- *self.ignore.borrow() ++ pub fn switch(&self) -> bool { ++ *self.switch.borrow() + } + } + + pub fn prepare(dir_str: &str) -> Result<()> { +- let history = Path::new(dir_str).join(RELI_HISTORY_DIR); ++ // directory ++ let history = history_path_get(dir_str); + if !history.exists() { + fs::create_dir_all(&history).context(IoSnafu)?; + } + ++ // sub-directory ++ let a = history.join(RELI_HISTORY_A_DIR); ++ if !a.exists() { ++ fs::create_dir_all(&a).context(IoSnafu)?; ++ } ++ ++ let b = history.join(RELI_HISTORY_B_DIR); ++ if !b.exists() { ++ fs::create_dir_all(&b).context(IoSnafu)?; ++ } ++ + Ok(()) + } ++ ++fn open_env(path: PathBuf, map_size: Option, max_dbs: Option) -> heed::Result { ++ let mut eoo = EnvOpenOptions::new(); ++ if let Some(size) = map_size { ++ eoo.map_size(size); ++ } ++ if let Some(max) = max_dbs { ++ eoo.max_dbs(max); ++ } ++ eoo.open(path) ++} ++ ++fn subdir_next_get(b_exist: bool) -> String { ++ if b_exist { ++ // b->a ++ String::from(RELI_HISTORY_A_DIR) ++ } else { ++ // a->b ++ String::from(RELI_HISTORY_B_DIR) ++ } ++} ++ ++fn subdir_cur_get(b_exist: bool) -> String { ++ if b_exist { ++ // b ++ String::from(RELI_HISTORY_B_DIR) ++ } else { ++ // a ++ String::from(RELI_HISTORY_A_DIR) ++ } ++} ++ ++fn bflag_path_get(history: PathBuf) -> PathBuf { ++ history.join(RELI_HISTORY_BFLAG_FILE) ++} ++ ++fn history_path_get(dir: &str) -> PathBuf { ++ Path::new(dir).join(RELI_HISTORY_DIR) ++} +diff --git a/core/lib/rel/mod.rs b/core/lib/rel/mod.rs +index 757a74b..07470e4 100644 +--- a/core/lib/rel/mod.rs ++++ b/core/lib/rel/mod.rs +@@ -11,7 +11,7 @@ + // See the Mulan PSL v2 for more details. + + //! reliability module +-pub use api::{reli_debug_enable_switch, reli_debug_get_switch, Reliability}; ++pub use api::{reli_debug_enable_switch, reli_debug_get_switch, ReliConf, Reliability}; + pub use base::{reli_dir_prepare, ReDb, ReDbRoTxn, ReDbRwTxn, ReDbTable}; + use serde::{Deserialize, Serialize}; + pub use station::{ReStation, ReStationKind}; +diff --git a/docs/man/sysmaster.conf.md b/docs/man/sysmaster.conf.md +index 039b543..2b91f34 100644 +--- a/docs/man/sysmaster.conf.md ++++ b/docs/man/sysmaster.conf.md +@@ -19,3 +19,11 @@ sysmaster支持从`/etc/sysmaster/system.toml`中读取系统配置,用于配 + ### LogFile + + 支持配置为`"`括起来的绝对路径,仅当`"LogTarget"`配置为`"file"`时生效。如果配置为空或不配置,将强制修改`LogTarget`为`"console"`。 ++ ++## 外置db配置 ++ ++支持通过`DbSize`等配置调整外置db参量。 ++ ++### DbSize ++ ++DbSize参量支持配置为最大内存占用规格,单位为字节。当配置值小于当前sysmaster所用内存值时,以当前sysmaster所用内存值为准。此配置在系统启动或者daemon-reexec后生效。 +-- +2.33.0 + diff --git a/backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch b/backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch index 88aebf8..8ba0664 100644 --- a/backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch +++ b/backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch @@ -124,7 +124,7 @@ index ef360c7..c6d9281 100644 + let stream = match UnixStream::connect(SCTL_SOCKET) { Err(e) => { eprintln!("Failed to connect to sysmaster: {}", e); - exit(e.raw_os_error().unwrap() as i32); + exit(e.raw_os_error().unwrap()); diff --git a/libs/constants/src/lib.rs b/libs/constants/src/lib.rs index bd3fba2..7c918aa 100644 --- a/libs/constants/src/lib.rs @@ -137,5 +137,5 @@ index bd3fba2..7c918aa 100644 +/// Socket used to transfer message between sysmaster and sctl +pub const SCTL_SOCKET: &str = "/run/sysmaster/sctl"; -- -2.30.2 +2.33.0 diff --git a/sysmaster.spec b/sysmaster.spec index f8550e1..4672e7e 100644 --- a/sysmaster.spec +++ b/sysmaster.spec @@ -11,7 +11,7 @@ Name: sysmaster Version: 0.2.3 -Release: 3 +Release: 4 Summary: redesign and reimplement process1. License: Mulan PSL v2 @@ -35,16 +35,24 @@ Patch6011: backport-fix-fix-some-cargo-clippies.patch Patch6012: backport-fix-compatible-with-rustc-1.60.patch Patch6013: backport-fix-compatible-with-rusc-1.60-lint-check.patch -#Patch6014-Patch6016: https://gitee.com/openeuler/sysmaster/pulls/598 -Patch6014: backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch -Patch6015: backport-feature-get-credential-from-the-connection-and-use-i.patch -Patch6016: backport-typo-fix-some-typoes-no-functional-change.patch +Patch6014: backport-fix-recycle-the-zombie-of-sysmaster.patch -Patch6017: backport-fix-fstab-use-fixed-inotify-version-0.10.0-to-be-com.patch -Patch6018: backport-fix-don-t-mount-all-subsystem-to-every-sys-fs-cgroup.patch -Patch6019: backport-fix-check-if-the-given-unit-name-is-valid-before-we-.patch -Patch6020: backport-fix-really-use-usec.patch -Patch6021: backport-docs-note-that-we-only-support-no-more-than-500-unit.patch +#Patch6015-Patch6017: https://gitee.com/openeuler/sysmaster/pulls/598 +Patch6015: backport-refactor-use-UnixStream-instead-TcpListener-to-get-t.patch +Patch6016: backport-feature-get-credential-from-the-connection-and-use-i.patch +Patch6017: backport-typo-fix-some-typoes-no-functional-change.patch + +Patch6018: backport-fix-fstab-use-fixed-inotify-version-0.10.0-to-be-com.patch +Patch6019: backport-fix-don-t-mount-all-subsystem-to-every-sys-fs-cgroup.patch +Patch6020: backport-fix-check-if-the-given-unit-name-is-valid-before-we-.patch +Patch6021: backport-fix-really-use-usec.patch +Patch6022: backport-docs-note-that-we-only-support-no-more-than-500-unit.patch +Patch6023: backport-fix-inconsistent-data-in-app-memory-and-db-after-dae.patch +Patch6024: backport-fix-.socket-stop-faild-when-.socket-is-in-listening.patch +Patch6025: backport-docs-Add-signal-processing-instructions-for-init.patch +Patch6026: backport-fix-support-for-adjusting-the-memory-usage-of-databa.patch +Patch6027: backport-fix-Clear-buffer-correctly-during-reload-or-reexec.patch +Patch6028: backport-fix-commands-connection-timeout-causing-event-blocki.patch ExclusiveArch: x86_64 aarch64 @@ -99,6 +107,9 @@ install -Dm0640 -t %{sysmaster_install_target}/plugin %{conf_install_source}/plu /usr/lib/sysmaster/* %changelog +* Mon Jun 19 2023 huyubiao - 0.2.3-4 +- sync patches from upstream + * Fri Jun 16 2023 licunlong - 0.2.3-3 - sync patches from upstream