Kea  1.9.9-git
ha_service.cc
Go to the documentation of this file.
1 // Copyright (C) 2018-2021 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
9 #include <command_creator.h>
10 #include <ha_log.h>
11 #include <ha_service.h>
12 #include <ha_service_states.h>
13 #include <cc/command_interpreter.h>
14 #include <cc/data.h>
15 #include <config/timeouts.h>
16 #include <dhcp/iface_mgr.h>
17 #include <dhcpsrv/cfgmgr.h>
18 #include <dhcpsrv/lease_mgr.h>
20 #include <http/date_time.h>
21 #include <http/response_json.h>
22 #include <http/post_request_json.h>
24 #include <util/stopwatch.h>
25 #include <boost/pointer_cast.hpp>
26 #include <boost/make_shared.hpp>
27 #include <boost/weak_ptr.hpp>
28 #include <functional>
29 #include <sstream>
30 
31 using namespace isc::asiolink;
32 using namespace isc::config;
33 using namespace isc::data;
34 using namespace isc::dhcp;
35 using namespace isc::hooks;
36 using namespace isc::http;
37 using namespace isc::log;
38 using namespace isc::util;
39 namespace ph = std::placeholders;
40 
41 namespace isc {
42 namespace ha {
43 
44 const int HAService::HA_HEARTBEAT_COMPLETE_EVT;
45 const int HAService::HA_LEASE_UPDATES_COMPLETE_EVT;
46 const int HAService::HA_SYNCING_FAILED_EVT;
47 const int HAService::HA_SYNCING_SUCCEEDED_EVT;
48 const int HAService::HA_MAINTENANCE_NOTIFY_EVT;
49 const int HAService::HA_MAINTENANCE_START_EVT;
50 const int HAService::HA_MAINTENANCE_CANCEL_EVT;
51 const int HAService::HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED;
52 
53 HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state,
54  const HAConfigPtr& config, const HAServerType& server_type)
55  : io_service_(io_service), network_state_(network_state), config_(config),
56  server_type_(server_type), client_(), listener_(), communication_state_(),
57  query_filter_(config), mutex_(), pending_requests_(),
58  lease_update_backlog_(config->getDelayedUpdatesLimit()) {
59 
60  if (server_type == HAServerType::DHCPv4) {
62 
63  } else {
65  }
66 
67  network_state_->reset(NetworkState::Origin::HA_COMMAND);
68 
70 
71  // Create the client and(or) listener as appropriate.
72  if (!config_->getEnableMultiThreading()) {
73  // Not configured for multi-threading, start a client in ST mode.
74  client_.reset(new HttpClient(*io_service_, 0));
75  } else {
76  // Create an MT-mode client.
77  client_.reset(new HttpClient(*io_service_,
78  config_->getHttpClientThreads(), true));
79 
80  // If we're configured to use our own listener create and start it.
81  if (config_->getHttpDedicatedListener()) {
82  // Get the server address and port from this server's URL.
83  auto my_url = config_->getThisServerConfig()->getUrl();
84  IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
85  try {
86  // Since we do not currently support hostname resolution,
87  // we need to make sure we have an IP address here.
88  server_address = IOAddress(my_url.getStrippedHostname());
89  } catch (const std::exception& ex) {
90  isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
91  << " is not a valid IP address");
92  }
93 
94  // Fetch how many threads the listener will use.
95  uint32_t listener_threads = config_->getHttpListenerThreads();
96 
97  // Instantiate the listener.
98  listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
99  listener_threads));
100  }
101  }
102 
104  .arg(HAConfig::HAModeToString(config->getHAMode()))
105  .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
106 }
107 
109  // Stop client and/or listener.
111 
112  network_state_->reset(NetworkState::Origin::HA_COMMAND);
113 }
114 
115 void
117  StateModel::defineEvents();
118 
119  defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
120  defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
121  defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
122  defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
123  defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
124  defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
125  defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
126 }
127 
128 void
130  StateModel::verifyEvents();
131 
139 }
140 
141 void
143  StateModel::defineStates();
144 
146  std::bind(&HAService::backupStateHandler, this),
147  config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
148 
150  std::bind(&HAService::communicationRecoveryHandler, this),
151  config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
152 
154  std::bind(&HAService::normalStateHandler, this),
155  config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
156 
158  std::bind(&HAService::normalStateHandler, this),
159  config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
160 
162  std::bind(&HAService::inMaintenanceStateHandler, this),
163  config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
164 
166  std::bind(&HAService::partnerDownStateHandler, this),
167  config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
168 
171  config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
172 
174  std::bind(&HAService::passiveBackupStateHandler, this),
175  config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
176 
178  std::bind(&HAService::readyStateHandler, this),
179  config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
180 
182  std::bind(&HAService::syncingStateHandler, this),
183  config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
184 
186  std::bind(&HAService::terminatedStateHandler, this),
187  config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
188 
190  std::bind(&HAService::waitingStateHandler, this),
191  config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
192 }
193 
194 void
196  if (doOnEntry()) {
199 
200  // Log if the state machine is paused.
202  }
203 
204  // There is nothing to do in that state. This server simply receives
205  // lease updates from the partners.
207 }
208 
209 void
211  if (doOnEntry()) {
214 
215  // Log if the state machine is paused.
217  }
218 
220 
223 
224  // Check if the clock skew is still acceptable. If not, transition to
225  // the terminated state.
226  } else if (shouldTerminate()) {
228 
229  } else if (isPartnerStateInvalid()) {
231 
232  } else {
233 
234  // Transitions based on the partner's state.
235  switch (communication_state_->getPartnerState()) {
238  break;
239 
240  case HA_PARTNER_DOWN_ST:
242  break;
243 
246  break;
247 
248  case HA_TERMINATED_ST:
250  break;
251 
252  case HA_UNAVAILABLE_ST:
253  if (shouldPartnerDown()) {
255 
256  } else {
258  }
259  break;
260 
261  case HA_WAITING_ST:
262  case HA_SYNCING_ST:
263  case HA_READY_ST:
264  // The partner seems to be waking up, perhaps after communication-recovery.
265  // If our backlog queue is overflown we need to synchronize our lease database.
266  // There is no need to send ha-reset to the partner because the partner is
267  // already synchronizing its lease database.
268  if (!communication_state_->isCommunicationInterrupted() &&
271  } else {
272  // Backlog was not overflown, so there is no need to synchronize our
273  // lease database. Let's wait until our partner completes synchronization
274  // and transitions to the load-balancing state.
276  }
277  break;
278 
279  default:
280  // If the communication is still interrupted, let's continue sitting
281  // in this state until it is resumed or until the transition to the
282  // partner-down state, depending on what happens first.
283  if (communication_state_->isCommunicationInterrupted()) {
285  break;
286  }
287 
288  // The communication has been resumed. The partner server must be in a state
289  // in which it can receive outstanding lease updates we collected. The number of
290  // outstanding lease updates must not exceed the configured limit. Finally, the
291  // lease updates must be successfully sent. If that all works, we will transition
292  // to the normal operation.
293  if ((communication_state_->getPartnerState() == getNormalState()) ||
294  (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
296  // If our lease backlog was overflown or we were unable to send lease
297  // updates to the partner we should notify the partner that it should
298  // synchronize the lease database. We do it by sending ha-reset command.
299  if (sendHAReset()) {
301  }
302  break;
303  }
304  // The backlog was not overflown and we successfully sent our lease updates.
305  // We can now transition to the normal operation state. If the partner
306  // fails to send his outstanding lease updates to us it should send the
307  // ha-reset command to us.
309  break;
310  }
311 
312  // The partner appears to be in unexpected state, we have exceeded the number
313  // of lease updates in a backlog or an attempt to send lease updates failed.
314  // In all these cases we follow plan B and transition to the waiting state.
315  // The server will then attempt to synchronize the entire lease database.
317  }
318  }
319 
320  // When exiting this state we must ensure that lease updates backlog is cleared.
321  if (doOnExit()) {
323  }
324 }
325 
326 void
328  // If we are transitioning from another state, we have to define new
329  // serving scopes appropriate for the new state. We don't do it if
330  // we remain in this state.
331  if (doOnEntry()) {
334 
335  // Log if the state machine is paused.
337  }
338 
340 
343  return;
344  }
345 
346  // Check if the clock skew is still acceptable. If not, transition to
347  // the terminated state.
348  if (shouldTerminate()) {
350  return;
351  }
352 
353  // Check if the partner state is valid per current configuration. If it is
354  // in an invalid state let's transition to the waiting state and stay there
355  // until the configuration is corrected.
356  if (isPartnerStateInvalid()) {
358  return;
359  }
360 
361  switch (communication_state_->getPartnerState()) {
364  break;
365 
366  case HA_PARTNER_DOWN_ST:
368  break;
369 
372  break;
373 
374  case HA_TERMINATED_ST:
376  break;
377 
378  case HA_UNAVAILABLE_ST:
379  if (shouldPartnerDown()) {
381 
382  } else if (config_->amAllowingCommRecovery()) {
384 
385  } else {
387  }
388  break;
389 
390  default:
392  }
393 
394  if (doOnExit()) {
395  // Do nothing here but doOnExit() call clears the "on exit" flag
396  // when transitioning to the communication-recovery state. In that
397  // state we need this flag to be cleared.
398  }
399 }
400 
401 void
403  // If we are transitioning from another state, we have to define new
404  // serving scopes appropriate for the new state. We don't do it if
405  // we remain in this state.
406  if (doOnEntry()) {
407  // In this state the server remains silent and waits for being
408  // shutdown.
411 
412  // Log if the state machine is paused.
414 
416  }
417 
419 
420  // We don't transition out of this state unless explicitly mandated
421  // by the administrator via a dedicated command which cancels
422  // the maintenance.
424 }
425 
426 void
428  // If we are transitioning from another state, we have to define new
429  // serving scopes appropriate for the new state. We don't do it if
430  // we remain in this state.
431  if (doOnEntry()) {
432 
433  bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
434 
435  // It may be administratively disabled to handle partner's scope
436  // in case of failure. If this is the case we'll just handle our
437  // default scope (or no scope at all). The user will need to
438  // manually enable this server to handle partner's scope.
439  // If we're in the maintenance mode we serve all scopes because
440  // it is not a failover situation.
441  if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
443  } else {
445  }
447 
448  // Log if the state machine is paused.
450 
451  if (maintenance) {
452  // If we ended up in the partner-down state as a result of
453  // receiving the ha-maintenance-start command let's log it.
455  }
456  }
457 
459 
462  return;
463  }
464 
465  // Check if the clock skew is still acceptable. If not, transition to
466  // the terminated state.
467  if (shouldTerminate()) {
469  return;
470  }
471 
472  // Check if the partner state is valid per current configuration. If it is
473  // in an invalid state let's transition to the waiting state and stay there
474  // until the configuration is corrected.
475  if (isPartnerStateInvalid()) {
477  return;
478  }
479 
480  switch (communication_state_->getPartnerState()) {
481  case HA_HOT_STANDBY_ST:
484  case HA_PARTNER_DOWN_ST:
487  break;
488 
489  case HA_READY_ST:
492  break;
493 
494  case HA_TERMINATED_ST:
496  break;
497 
498  default:
500  }
501 }
502 
503 void
505  // If we are transitioning from another state, we have to define new
506  // serving scopes appropriate for the new state. We don't do it if
507  // we remain in this state.
508  if (doOnEntry()) {
510 
512 
513  // Log if the state machine is paused.
515 
517  }
518 
520 
521  if (isModelPaused()) {
523  return;
524  }
525 
526  // Check if the clock skew is still acceptable. If not, transition to
527  // the terminated state.
528  if (shouldTerminate()) {
530  return;
531  }
532 
533  switch (communication_state_->getPartnerState()) {
534  case HA_UNAVAILABLE_ST:
536  break;
537  default:
539  }
540 }
541 
542 void
544  // If we are transitioning from another state, we have to define new
545  // serving scopes appropriate for the new state. We don't do it if
546  // we remain in this state.
547  if (doOnEntry()) {
550 
551  // In the passive-backup state we don't send heartbeat.
552  communication_state_->stopHeartbeat();
553 
554  // Log if the state machine is paused.
556  }
558 }
559 
560 void
562  // If we are transitioning from another state, we have to define new
563  // serving scopes appropriate for the new state. We don't do it if
564  // we remain in this state.
565  if (doOnEntry()) {
568 
569  // Log if the state machine is paused.
571  }
572 
574 
577  return;
578  }
579 
580  // Check if the clock skew is still acceptable. If not, transition to
581  // the terminated state.
582  if (shouldTerminate()) {
584  return;
585  }
586 
587  // Check if the partner state is valid per current configuration. If it is
588  // in an invalid state let's transition to the waiting state and stay there
589  // until the configuration is corrected.
590  if (isPartnerStateInvalid()) {
592  return;
593  }
594 
595  switch (communication_state_->getPartnerState()) {
596  case HA_HOT_STANDBY_ST:
600  break;
601 
604  break;
605 
608  break;
609 
610  case HA_READY_ST:
611  // If both servers are ready, the primary server "wins" and is
612  // transitioned first.
613  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
616  } else {
618  }
619  break;
620 
621  case HA_TERMINATED_ST:
623  break;
624 
625  case HA_UNAVAILABLE_ST:
626  if (shouldPartnerDown()) {
628 
629  } else {
631  }
632  break;
633 
634  default:
636  }
637 }
638 
639 void
641  // If we are transitioning from another state, we have to define new
642  // serving scopes appropriate for the new state. We don't do it if
643  // we remain in this state.
644  if (doOnEntry()) {
647 
648  // Log if the state machine is paused.
650  }
651 
654  return;
655  }
656 
657  // Check if the clock skew is still acceptable. If not, transition to
658  // the terminated state.
659  if (shouldTerminate()) {
661  return;
662  }
663 
664  // Check if the partner state is valid per current configuration. If it is
665  // in an invalid state let's transition to the waiting state and stay there
666  // until the configuration is corrected.
667  if (isPartnerStateInvalid()) {
669  return;
670  }
671 
672  // We don't want to perform synchronous attempt to synchronize with
673  // a partner until we know that the partner is responding. Therefore,
674  // we wait for the heartbeat to complete successfully before we
675  // initiate the synchronization.
676  switch (communication_state_->getPartnerState()) {
677  case HA_TERMINATED_ST:
679  return;
680 
681  case HA_UNAVAILABLE_ST:
682  // If the partner appears to be offline, let's transition to the partner
683  // down state. Otherwise, we'd be stuck trying to synchronize with a
684  // dead partner.
685  if (shouldPartnerDown()) {
687 
688  } else {
690  }
691  break;
692 
693  default:
694  // We don't want the heartbeat to interfere with the synchronization,
695  // so let's temporarily stop it.
696  communication_state_->stopHeartbeat();
697 
698  // Timeout is configured in milliseconds. Need to convert to seconds.
699  unsigned int dhcp_disable_timeout =
700  static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
701  if (dhcp_disable_timeout == 0) {
702  ++dhcp_disable_timeout;
703  }
704 
705  // Perform synchronous leases update.
706  std::string status_message;
707  int sync_status = synchronize(status_message,
708  config_->getFailoverPeerConfig()->getName(),
709  dhcp_disable_timeout);
710 
711  // If the leases synchronization was successful, let's transition
712  // to the ready state.
713  if (sync_status == CONTROL_RESULT_SUCCESS) {
715 
716  } else {
717  // If the synchronization was unsuccessful we're back to the
718  // situation that the partner is unavailable and therefore
719  // we stay in the syncing state.
721  }
722  }
723 
724  // Make sure that the heartbeat is re-enabled.
726 }
727 
728 void
730  // If we are transitioning from another state, we have to define new
731  // serving scopes appropriate for the new state. We don't do it if
732  // we remain in this state.
733  if (doOnEntry()) {
736 
737  // In the terminated state we don't send heartbeat.
738  communication_state_->stopHeartbeat();
739 
740  // Log if the state machine is paused.
742 
744  }
745 
747 }
748 
749 void
751  // If we are transitioning from another state, we have to define new
752  // serving scopes appropriate for the new state. We don't do it if
753  // we remain in this state.
754  if (doOnEntry()) {
757 
758  // Log if the state machine is paused.
760  }
761 
762  // Only schedule the heartbeat for non-backup servers.
763  if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
764  (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
766  }
767 
770  return;
771  }
772 
773  // Backup server must remain in its own state.
774  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
776  return;
777  }
778 
779  // We're not a backup server, so we're either primary or secondary. If this is
780  // a passive-backup mode of operation, we're primary and we should transition
781  // to the passive-backup state.
782  if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
784  return;
785  }
786 
787  // Check if the clock skew is still acceptable. If not, transition to
788  // the terminated state.
789  if (shouldTerminate()) {
791  return;
792  }
793 
794  // Check if the partner state is valid per current configuration. If it is
795  // in an invalid state let's sit in the waiting state until the configuration
796  // is corrected.
797  if (isPartnerStateInvalid()) {
799  return;
800  }
801 
802  switch (communication_state_->getPartnerState()) {
804  case HA_HOT_STANDBY_ST:
807  case HA_PARTNER_DOWN_ST:
809  case HA_READY_ST:
810  // If we're configured to not synchronize lease database, proceed directly
811  // to the "ready" state.
812  verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
813  break;
814 
815  case HA_SYNCING_ST:
817  break;
818 
819  case HA_TERMINATED_ST:
820  // We have checked above whether the clock skew is exceeding the threshold
821  // and we should terminate. If we're here, it means that the clock skew
822  // is acceptable. The partner may be still in the terminated state because
823  // it hasn't been restarted yet. Probably, this server is the first one
824  // being restarted after syncing the clocks. Let's just sit in the waiting
825  // state until the partner gets restarted.
828  break;
829 
830  case HA_WAITING_ST:
831  // If both servers are waiting, the primary server 'wins' and is
832  // transitioned to the next state first.
833  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
834  // If we're configured to not synchronize lease database, proceed directly
835  // to the "ready" state.
836  verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
837 
838  } else {
840  }
841  break;
842 
843  case HA_UNAVAILABLE_ST:
844  if (shouldPartnerDown()) {
846 
847  } else {
849  }
850  break;
851 
852  default:
854  }
855 }
856 
857 void
858 HAService::verboseTransition(const unsigned state) {
859  // Get current and new state name.
860  std::string current_state_name = getStateLabel(getCurrState());
861  std::string new_state_name = getStateLabel(state);
862 
863  // Turn them to upper case so as they are better visible in the logs.
864  boost::to_upper(current_state_name);
865  boost::to_upper(new_state_name);
866 
867  if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
868  // If this is load-balancing or hot-standby mode we also want to log
869  // partner's state.
870  auto partner_state = communication_state_->getPartnerState();
871  std::string partner_state_name = getStateLabel(partner_state);
872  boost::to_upper(partner_state_name);
873 
874  // Log the transition.
876  .arg(current_state_name)
877  .arg(new_state_name)
878  .arg(partner_state_name);
879 
880  } else {
881  // In the passive-backup mode we don't know the partner's state.
883  .arg(current_state_name)
884  .arg(new_state_name);
885  }
886 
887  // If we're transitioning directly from the "waiting" to "ready"
888  // state it indicates that the database synchronization is
889  // administratively disabled. Let's remind the user about this
890  // configuration setting.
891  if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
893  }
894 
895  // Do the actual transition.
896  transition(state, getNextEvent());
897 
898  // Inform the administrator whether or not lease updates are generated.
899  // Updates are never generated by a backup server so it doesn't make
900  // sense to log anything for the backup server.
901  if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
902  (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
903  if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
905  .arg(new_state_name);
906 
907  } else if (!config_->amSendingLeaseUpdates()) {
908  // Lease updates are administratively disabled.
910  .arg(new_state_name);
911 
912  } else {
913  // Lease updates are not administratively disabled, but they
914  // are not issued because this is the backup server or because
915  // in this state the server should not generate lease updates.
917  .arg(new_state_name);
918  }
919  }
920 }
921 
922 int
924  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
925  return (HA_BACKUP_ST);
926  }
927 
928  switch (config_->getHAMode()) {
930  return (HA_LOAD_BALANCING_ST);
932  return (HA_HOT_STANDBY_ST);
933  default:
934  return (HA_PASSIVE_BACKUP_ST);
935  }
936 }
937 
938 bool
940  if (isModelPaused()) {
942  unpauseModel();
943  return (true);
944  }
945  return (false);
946 }
947 
948 void
950  // Inform the administrator if the state machine is paused.
951  if (isModelPaused()) {
952  std::string state_name = stateToString(getCurrState());
953  boost::to_upper(state_name);
955  .arg(state_name);
956  }
957 }
958 
959 void
962 }
963 
964 bool
966  return (inScopeInternal(query4));
967 }
968 
969 bool
971  return (inScopeInternal(query6));
972 }
973 
974 template<typename QueryPtrType>
975 bool
976 HAService::inScopeInternal(QueryPtrType& query) {
977  // Check if the query is in scope (should be processed by this server).
978  std::string scope_class;
979  const bool in_scope = query_filter_.inScope(query, scope_class);
980  // Whether or not the query is going to be processed by this server,
981  // we associate the query with the appropriate class.
982  query->addClass(dhcp::ClientClass(scope_class));
983  // The following is the part of the server failure detection algorithm.
984  // If the query should be processed by the partner we need to check if
985  // the partner responds. If the number of unanswered queries exceeds a
986  // configured threshold, we will consider the partner to be offline.
987  if (!in_scope && communication_state_->isCommunicationInterrupted()) {
988  communication_state_->analyzeMessage(query);
989  }
990  // Indicate if the query is in scope.
991  return (in_scope);
992 }
993 
994 void
996  std::string current_state_name = getStateLabel(getCurrState());
997  boost::to_upper(current_state_name);
998 
999  // DHCP service should be enabled in the following states.
1000  const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1007 
1008  if (!should_enable && network_state_->isServiceEnabled()) {
1009  std::string current_state_name = getStateLabel(getCurrState());
1010  boost::to_upper(current_state_name);
1012  .arg(config_->getThisServerName())
1013  .arg(current_state_name);
1014  network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1015 
1016  } else if (should_enable && !network_state_->isServiceEnabled()) {
1017  std::string current_state_name = getStateLabel(getCurrState());
1018  boost::to_upper(current_state_name);
1020  .arg(config_->getThisServerName())
1021  .arg(current_state_name);
1022  network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1023  }
1024 }
1025 
1026 bool
1028  // Checking whether the communication with the partner is OK is the
1029  // first step towards verifying if the server is up.
1030  if (communication_state_->isCommunicationInterrupted()) {
1031  // If the communication is interrupted, we also have to check
1032  // whether the partner answers DHCP requests. The only cases
1033  // when we don't (can't) do it are: the hot standby configuration
1034  // in which this server is a primary and when the DHCP service is
1035  // disabled so we can't analyze incoming traffic. Note that the
1036  // primary server can't check delayed responses to the partner
1037  // because the partner doesn't respond to any queries in this
1038  // configuration.
1039  if (network_state_->isServiceEnabled() &&
1040  ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1041  (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1042  return (communication_state_->failureDetected());
1043  }
1044 
1045  // Hot standby / primary case.
1046  return (true);
1047  }
1048 
1049  // Shouldn't transition to the partner down state.
1050  return (false);
1051 }
1052 
1053 bool
1055  // Check if skew is fatally large.
1056  bool should_terminate = communication_state_->clockSkewShouldTerminate();
1057 
1058  // If not issue a warning if it's getting large.
1059  if (!should_terminate) {
1060  communication_state_->clockSkewShouldWarn();
1061  }
1062 
1063  return (should_terminate);
1064 }
1065 
1066 bool
1069 }
1070 
1071 bool
1073  switch (communication_state_->getPartnerState()) {
1075  if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1077  return (true);
1078  }
1079  break;
1080 
1081  case HA_HOT_STANDBY_ST:
1082  if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1084  return (true);
1085  }
1086  break;
1087 
1088  case HA_LOAD_BALANCING_ST:
1089  if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1091  return (true);
1092  }
1093  break;
1094 
1095  default:
1096  ;
1097  }
1098  return (false);
1099 }
1100 
1101 size_t
1103  const dhcp::Lease4CollectionPtr& leases,
1104  const dhcp::Lease4CollectionPtr& deleted_leases,
1105  const hooks::ParkingLotHandlePtr& parking_lot) {
1106 
1107  // Get configurations of the peers. Exclude this instance.
1108  HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1109 
1110  size_t sent_num = 0;
1111 
1112  // Schedule sending lease updates to each peer.
1113  for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1114  HAConfig::PeerConfigPtr conf = p->second;
1115 
1116  // Check if the lease updates should be queued. This is the case when the
1117  // server is in the communication-recovery state. Queued lease updates may
1118  // be sent when the communication is re-established.
1119  if (shouldQueueLeaseUpdates(conf)) {
1120  // Lease updates for deleted leases.
1121  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1123  }
1124 
1125  // Lease updates for new allocations and updated leases.
1126  for (auto l = leases->begin(); l != leases->end(); ++l) {
1128  }
1129 
1130  continue;
1131  }
1132 
1133  // Check if the lease update should be sent to the server. If we're in
1134  // the partner-down state we don't send lease updates to the partner.
1135  if (!shouldSendLeaseUpdates(conf)) {
1136  continue;
1137  }
1138 
1139  // Lease updates for deleted leases.
1140  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1142  parking_lot);
1143  }
1144 
1145  // Lease updates for new allocations and updated leases.
1146  for (auto l = leases->begin(); l != leases->end(); ++l) {
1148  parking_lot);
1149  }
1150 
1151  // If we're contacting a backup server from which we don't expect a
1152  // response prior to responding to the DHCP client we don't count
1153  // it.
1154  if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1155  ++sent_num;
1156  }
1157  }
1158 
1159  return (sent_num);
1160 }
1161 
1162 size_t
1164  const dhcp::Lease6CollectionPtr& leases,
1165  const dhcp::Lease6CollectionPtr& deleted_leases,
1166  const hooks::ParkingLotHandlePtr& parking_lot) {
1167 
1168  // Get configurations of the peers. Exclude this instance.
1169  HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1170 
1171  size_t sent_num = 0;
1172 
1173  // Schedule sending lease updates to each peer.
1174  for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1175  HAConfig::PeerConfigPtr conf = p->second;
1176 
1177  // Check if the lease updates should be queued. This is the case when the
1178  // server is in the communication-recovery state. Queued lease updates may
1179  // be sent when the communication is re-established.
1180  if (shouldQueueLeaseUpdates(conf)) {
1181  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1183  }
1184 
1185  // Lease updates for new allocations and updated leases.
1186  for (auto l = leases->begin(); l != leases->end(); ++l) {
1188  }
1189 
1190  continue;
1191  }
1192 
1193  // Check if the lease update should be sent to the server. If we're in
1194  // the partner-down state we don't send lease updates to the partner.
1195  if (!shouldSendLeaseUpdates(conf)) {
1196  continue;
1197  }
1198 
1199  // If we're contacting a backup server from which we don't expect a
1200  // response prior to responding to the DHCP client we don't count
1201  // it.
1202  if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1203  ++sent_num;
1204  }
1205 
1206  // Send new/updated leases and deleted leases in one command.
1207  asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1208  parking_lot);
1209  }
1210 
1211  return (sent_num);
1212 }
1213 
1214 template<typename QueryPtrType>
1215 bool
1217  const ParkingLotHandlePtr& parking_lot) {
1218  if (MultiThreadingMgr::instance().getMode()) {
1219  std::lock_guard<std::mutex> lock(mutex_);
1220  return (leaseUpdateCompleteInternal(query, parking_lot));
1221  } else {
1222  return (leaseUpdateCompleteInternal(query, parking_lot));
1223  }
1224 }
1225 
1226 template<typename QueryPtrType>
1227 bool
1228 HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1229  const ParkingLotHandlePtr& parking_lot) {
1230  auto it = pending_requests_.find(query);
1231 
1232  // If there are no more pending requests for this query, let's unpark
1233  // the DHCP packet.
1234  if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1235  parking_lot->unpark(query);
1236 
1237  // If we have unparked the packet we can clear pending requests for
1238  // this query.
1239  if (it != pending_requests_.end()) {
1240  pending_requests_.erase(it);
1241  }
1242  return (true);
1243  }
1244  return (false);
1245 }
1246 
1247 template<typename QueryPtrType>
1248 void
1249 HAService::updatePendingRequest(QueryPtrType& query) {
1250  if (MultiThreadingMgr::instance().getMode()) {
1251  std::lock_guard<std::mutex> lock(mutex_);
1252  updatePendingRequestInternal(query);
1253  } else {
1254  updatePendingRequestInternal(query);
1255  }
1256 }
1257 
1258 template<typename QueryPtrType>
1259 void
1260 HAService::updatePendingRequestInternal(QueryPtrType& query) {
1261  if (pending_requests_.count(query) == 0) {
1262  pending_requests_[query] = 1;
1263  } else {
1264  ++pending_requests_[query];
1265  }
1266 }
1267 
1268 template<typename QueryPtrType>
1269 void
1270 HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1271  const HAConfig::PeerConfigPtr& config,
1272  const ConstElementPtr& command,
1273  const ParkingLotHandlePtr& parking_lot) {
1274  // Create HTTP/1.1 request including our command.
1275  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1276  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1277  HostHttpHeader(config->getUrl().getHostname()));
1278  config->addBasicAuthHttpHeader(request);
1279  request->setBodyAsJson(command);
1280  request->finalize();
1281 
1282  // Response object should also be created because the HTTP client needs
1283  // to know the type of the expected response.
1284  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1285 
1286  // When possible we prefer to pass weak pointers to the queries, rather
1287  // than shared pointers, to avoid memory leaks in case cross reference
1288  // between the pointers.
1289  boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1290 
1291  // Schedule asynchronous HTTP request.
1292  client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1293  request, response,
1294  [this, weak_query, parking_lot, config]
1295  (const boost::system::error_code& ec,
1296  const HttpResponsePtr& response,
1297  const std::string& error_str) {
1298  // Get the shared pointer of the query. The server should keep the
1299  // pointer to the query and then park it. Therefore, we don't really
1300  // expect it to be null. If it is null, something is really wrong.
1301  QueryPtrType query = weak_query.lock();
1302  if (!query) {
1303  isc_throw(Unexpected, "query is null while receiving response from"
1304  " HA peer. This is programmatic error");
1305  }
1306 
1307  // There are three possible groups of errors during the lease update.
1308  // One is the IO error causing issues in communication with the peer.
1309  // Another one is an HTTP parsing error. The last type of error is
1310  // when non-success error code is returned in the response carried
1311  // in the HTTP message or if the JSON response is otherwise broken.
1312 
1313  bool lease_update_success = true;
1314 
1315  // Handle first two groups of errors.
1316  if (ec || !error_str.empty()) {
1318  .arg(query->getLabel())
1319  .arg(config->getLogLabel())
1320  .arg(ec ? ec.message() : error_str);
1321 
1322  // Communication error, so let's drop parked packet. The DHCP
1323  // response will not be sent.
1324  lease_update_success = false;
1325 
1326  } else {
1327 
1328  // Handle third group of errors.
1329  try {
1330  int rcode = 0;
1331  auto args = verifyAsyncResponse(response, rcode);
1332  // In the v6 case the server may return a list of failed lease
1333  // updates and we should log them.
1334  logFailedLeaseUpdates(query, args);
1335 
1336  } catch (const std::exception& ex) {
1338  .arg(query->getLabel())
1339  .arg(config->getLogLabel())
1340  .arg(ex.what());
1341 
1342  // Error while doing an update. The DHCP response will not be sent.
1343  lease_update_success = false;
1344  }
1345  }
1346 
1347  // We don't care about the result of the lease update to the backup server.
1348  // It is a best effort update.
1349  if (config->getRole() != HAConfig::PeerConfig::BACKUP) {
1350  if (lease_update_success) {
1351  // If the lease update was successful and we have sent it to the server
1352  // to which we also send heartbeats (primary, secondary or standby) we
1353  // can assume that the server is online and we can defer next heartbeat.
1354  communication_state_->poke();
1355 
1356  } else {
1357  communication_state_->setPartnerState("unavailable");
1358  }
1359  }
1360 
1361  // It is possible to configure the server to not wait for a response from
1362  // the backup server before we unpark the packet and respond to the client.
1363  // Here we check if we're dealing with such situation.
1364  if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1365  // We're expecting a response from the backup server or it is not
1366  // a backup server and the lease update was unsuccessful. In such
1367  // case the DHCP exchange fails.
1368  if (!lease_update_success) {
1369  parking_lot->drop(query);
1370  }
1371  } else {
1372  // This was a response from the backup server and we're configured to
1373  // not wait for their acknowledgments, so there is nothing more to do.
1374  return;
1375  }
1376 
1377  if (leaseUpdateComplete(query, parking_lot)) {
1378  // If we have finished sending the lease updates we need to run the
1379  // state machine until the state machine finds that additional events
1380  // are required, such as next heartbeat or a lease update. The runModel()
1381  // may transition to another state, schedule asynchronous tasks etc.
1382  // Then it returns control to the DHCP server.
1384  }
1385  },
1387  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1388  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1389  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1390  );
1391 
1392  // The number of pending requests is the number of requests for which we
1393  // expect an acknowledgment prior to responding to the DHCP clients. If
1394  // we're configured to wait for the acks from the backups or it is not
1395  // a backup increase the number of pending requests.
1396  if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1397  // Request scheduled, so update the request counters for the query.
1398  updatePendingRequest(query);
1399  }
1400 }
1401 
1402 bool
1404  // Never send lease updates if they are administratively disabled.
1405  if (!config_->amSendingLeaseUpdates()) {
1406  return (false);
1407  }
1408 
1409  // Always send updates to the backup server.
1410  if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1411  return (true);
1412  }
1413 
1414  // Never send updates if this is a backup server.
1415  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1416  return (false);
1417  }
1418 
1419  // In other case, whether we send lease updates or not depends on our
1420  // state.
1421  switch (getCurrState()) {
1422  case HA_HOT_STANDBY_ST:
1423  case HA_LOAD_BALANCING_ST:
1425  return (true);
1426 
1427  default:
1428  ;
1429  }
1430 
1431  return (false);
1432 }
1433 
1434 bool
1436  if (!config_->amSendingLeaseUpdates()) {
1437  return (false);
1438  }
1439 
1440  if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1441  return (false);
1442  }
1443 
1445 }
1446 
1447 void
1449  const ConstElementPtr& args) const {
1450  // If there are no arguments, it means that the update was successful.
1451  if (!args || (args->getType() != Element::map)) {
1452  return;
1453  }
1454 
1455  // Instead of duplicating the code between the failed-deleted-leases and
1456  // failed-leases, let's just have one function that does it for both.
1457  auto log_proc = [](const PktPtr query, const ConstElementPtr& args,
1458  const std::string& param_name, const log::MessageID& mesid) {
1459 
1460  // Check if there are any failed leases.
1461  auto failed_leases = args->get(param_name);
1462 
1463  // The failed leases must be a list.
1464  if (failed_leases && (failed_leases->getType() == Element::list)) {
1465  // Go over the failed leases and log each of them.
1466  for (int i = 0; i < failed_leases->size(); ++i) {
1467  auto lease = failed_leases->get(i);
1468  if (lease->getType() == Element::map) {
1469 
1470  // ip-address
1471  auto ip_address = lease->get("ip-address");
1472 
1473  // lease type
1474  auto lease_type = lease->get("type");
1475 
1476  // error-message
1477  auto error_message = lease->get("error-message");
1478 
1479  LOG_INFO(ha_logger, mesid)
1480  .arg(query->getLabel())
1481  .arg(lease_type && (lease_type->getType() == Element::string) ?
1482  lease_type->stringValue() : "(unknown)")
1483  .arg(ip_address && (ip_address->getType() == Element::string) ?
1484  ip_address->stringValue() : "(unknown)")
1485  .arg(error_message && (error_message->getType() == Element::string) ?
1486  error_message->stringValue() : "(unknown)");
1487  }
1488  }
1489  }
1490  };
1491 
1492  // Process "failed-deleted-leases"
1493  log_proc(query, args, "failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1494 
1495  // Process "failed-leases".
1496  log_proc(query, args, "failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1497 }
1498 
1501  ElementPtr ha_servers = Element::createMap();
1502 
1503  // Local part
1504  ElementPtr local = Element::createMap();
1506  role = config_->getThisServerConfig()->getRole();
1507  std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1508  local->set("role", Element::create(role_txt));
1509  int state = getCurrState();
1510  try {
1511  local->set("state", Element::create(stateToString(state)));
1512 
1513  } catch (...) {
1514  // Empty string on error.
1515  local->set("state", Element::create(std::string()));
1516  }
1517  std::set<std::string> scopes = query_filter_.getServedScopes();
1518  ElementPtr list = Element::createList();
1519  for (std::string scope : scopes) {
1520  list->add(Element::create(scope));
1521  }
1522  local->set("scopes", list);
1523  ha_servers->set("local", local);
1524 
1525  // Do not include remote server information if this is a backup server or
1526  // we're in the passive-backup mode.
1527  if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1528  (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1529  return (ha_servers);
1530  }
1531 
1532  // Remote part
1533  ElementPtr remote = communication_state_->getReport();
1534 
1535  try {
1536  role = config_->getFailoverPeerConfig()->getRole();
1537  std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1538  remote->set("role", Element::create(role_txt));
1539 
1540  } catch (...) {
1541  remote->set("role", Element::create(std::string()));
1542  }
1543  ha_servers->set("remote", remote);
1544 
1545  return (ha_servers);
1546 }
1547 
1550  ElementPtr arguments = Element::createMap();
1551  std::string state_label = getState(getCurrState())->getLabel();
1552  arguments->set("state", Element::create(state_label));
1553 
1554  std::string date_time = HttpDateTime().rfc1123Format();
1555  arguments->set("date-time", Element::create(date_time));
1556 
1557  auto scopes = query_filter_.getServedScopes();
1558  ElementPtr scopes_list = Element::createList();
1559  for (auto scope : scopes) {
1560  scopes_list->add(Element::create(scope));
1561  }
1562  arguments->set("scopes", scopes_list);
1563 
1564  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1565  arguments));
1566 }
1567 
1570  if (getCurrState() == HA_WAITING_ST) {
1571  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1572  }
1574  runModel(NOP_EVT);
1575  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1576 }
1577 
1578 void
1580  HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1581 
1582  // Create HTTP/1.1 request including our command.
1583  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1584  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1585  HostHttpHeader(partner_config->getUrl().getHostname()));
1586  partner_config->addBasicAuthHttpHeader(request);
1587  request->setBodyAsJson(CommandCreator::createHeartbeat(server_type_));
1588  request->finalize();
1589 
1590  // Response object should also be created because the HTTP client needs
1591  // to know the type of the expected response.
1592  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1593 
1594  // Schedule asynchronous HTTP request.
1595  client_->asyncSendRequest(partner_config->getUrl(),
1596  partner_config->getTlsContext(),
1597  request, response,
1598  [this, partner_config]
1599  (const boost::system::error_code& ec,
1600  const HttpResponsePtr& response,
1601  const std::string& error_str) {
1602 
1603  // There are three possible groups of errors during the heartbeat.
1604  // One is the IO error causing issues in communication with the peer.
1605  // Another one is an HTTP parsing error. The last type of error is
1606  // when non-success error code is returned in the response carried
1607  // in the HTTP message or if the JSON response is otherwise broken.
1608 
1609  bool heartbeat_success = true;
1610 
1611  // Handle first two groups of errors.
1612  if (ec || !error_str.empty()) {
1614  .arg(partner_config->getLogLabel())
1615  .arg(ec ? ec.message() : error_str);
1616  heartbeat_success = false;
1617 
1618  } else {
1619 
1620  // Handle third group of errors.
1621  try {
1622  // Response must contain arguments and the arguments must
1623  // be a map.
1624  int rcode = 0;
1625  ConstElementPtr args = verifyAsyncResponse(response, rcode);
1626  if (!args || args->getType() != Element::map) {
1627  isc_throw(CtrlChannelError, "returned arguments in the response"
1628  " must be a map");
1629  }
1630  // Response must include partner's state.
1631  ConstElementPtr state = args->get("state");
1632  if (!state || state->getType() != Element::string) {
1633  isc_throw(CtrlChannelError, "server state not returned in response"
1634  " to a ha-heartbeat command or it is not a string");
1635  }
1636  // Remember the partner's state. This may throw if the returned
1637  // state is invalid.
1638  communication_state_->setPartnerState(state->stringValue());
1639 
1640  ConstElementPtr date_time = args->get("date-time");
1641  if (!date_time || date_time->getType() != Element::string) {
1642  isc_throw(CtrlChannelError, "date-time not returned in response"
1643  " to a ha-heartbeat command or it is not a string");
1644  }
1645  // Note the time returned by the partner to calculate the clock skew.
1646  communication_state_->setPartnerTime(date_time->stringValue());
1647 
1648  // Remember the scopes served by the partner.
1649  try {
1650  auto scopes = args->get("scopes");
1651  communication_state_->setPartnerScopes(scopes);
1652 
1653  } catch (...) {
1654  // We don't want to fail if the scopes are missing because
1655  // this would be incompatible with old HA hook library
1656  // versions. We may make it mandatory one day, but during
1657  // upgrades of existing HA setup it would be a real issue
1658  // if we failed here.
1659  }
1660 
1661  } catch (const std::exception& ex) {
1663  .arg(partner_config->getLogLabel())
1664  .arg(ex.what());
1665  heartbeat_success = false;
1666  }
1667  }
1668 
1669  // If heartbeat was successful, let's mark the connection with the
1670  // peer as healthy.
1671  if (heartbeat_success) {
1672  communication_state_->poke();
1673 
1674  } else {
1675  // We were unable to retrieve partner's state, so let's mark it
1676  // as unavailable.
1677  communication_state_->setPartnerState("unavailable");
1678  // Log if the communication is interrupted.
1679  if (communication_state_->isCommunicationInterrupted()) {
1681  .arg(partner_config->getName());
1682  }
1683  }
1684 
1685  // Whatever the result of the heartbeat was, the state machine needs
1686  // to react to this. Let's run the state machine until the state machine
1687  // finds that some new events are required, i.e. next heartbeat or
1688  // lease update. The runModel() may transition to another state, schedule
1689  // asynchronous tasks etc. Then it returns control to the DHCP server.
1690  startHeartbeat();
1692  },
1694  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1695  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1696  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1697  );
1698 }
1699 
1700 void
1702  if (!communication_state_->isHeartbeatRunning()) {
1703  startHeartbeat();
1704  }
1705 }
1706 
1707 void
1709  if (config_->getHeartbeatDelay() > 0) {
1710  communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1711  std::bind(&HAService::asyncSendHeartbeat,
1712  this));
1713  }
1714 }
1715 
1716 void
1718  const std::string& server_name,
1719  const unsigned int max_period,
1720  PostRequestCallback post_request_action) {
1721  HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1722 
1723  // Create HTTP/1.1 request including our command.
1724  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1725  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1726  HostHttpHeader(remote_config->getUrl().getHostname()));
1727 
1728  remote_config->addBasicAuthHttpHeader(request);
1729  request->setBodyAsJson(CommandCreator::createDHCPDisable(max_period,
1730  server_type_));
1731  request->finalize();
1732 
1733  // Response object should also be created because the HTTP client needs
1734  // to know the type of the expected response.
1735  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1736 
1737  // Schedule asynchronous HTTP request.
1738  http_client.asyncSendRequest(remote_config->getUrl(),
1739  remote_config->getTlsContext(),
1740  request, response,
1741  [this, remote_config, post_request_action]
1742  (const boost::system::error_code& ec,
1743  const HttpResponsePtr& response,
1744  const std::string& error_str) {
1745 
1746  // There are three possible groups of errors during the heartbeat.
1747  // One is the IO error causing issues in communication with the peer.
1748  // Another one is an HTTP parsing error. The last type of error is
1749  // when non-success error code is returned in the response carried
1750  // in the HTTP message or if the JSON response is otherwise broken.
1751 
1752  std::string error_message;
1753 
1754  // Handle first two groups of errors.
1755  if (ec || !error_str.empty()) {
1756  error_message = (ec ? ec.message() : error_str);
1758  .arg(remote_config->getLogLabel())
1759  .arg(error_message);
1760 
1761  } else {
1762 
1763  // Handle third group of errors.
1764  try {
1765  int rcode = 0;
1766  static_cast<void>(verifyAsyncResponse(response, rcode));
1767 
1768  } catch (const std::exception& ex) {
1769  error_message = ex.what();
1771  .arg(remote_config->getLogLabel())
1772  .arg(error_message);
1773  }
1774  }
1775 
1776  // If there was an error communicating with the partner, mark the
1777  // partner as unavailable.
1778  if (!error_message.empty()) {
1779  communication_state_->setPartnerState("unavailable");
1780  }
1781 
1782  // Invoke post request action if it was specified.
1783  if (post_request_action) {
1784  post_request_action(error_message.empty(),
1785  error_message);
1786  }
1787  },
1789  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1790  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1791  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1792  );
1793 }
1794 
1795 void
1797  const std::string& server_name,
1798  PostRequestCallback post_request_action) {
1799  HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1800 
1801  // Create HTTP/1.1 request including our command.
1802  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1803  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1804  HostHttpHeader(remote_config->getUrl().getHostname()));
1805  remote_config->addBasicAuthHttpHeader(request);
1806  request->setBodyAsJson(CommandCreator::createDHCPEnable(server_type_));
1807  request->finalize();
1808 
1809  // Response object should also be created because the HTTP client needs
1810  // to know the type of the expected response.
1811  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1812 
1813  // Schedule asynchronous HTTP request.
1814  http_client.asyncSendRequest(remote_config->getUrl(),
1815  remote_config->getTlsContext(),
1816  request, response,
1817  [this, remote_config, post_request_action]
1818  (const boost::system::error_code& ec,
1819  const HttpResponsePtr& response,
1820  const std::string& error_str) {
1821 
1822  // There are three possible groups of errors during the heartbeat.
1823  // One is the IO error causing issues in communication with the peer.
1824  // Another one is an HTTP parsing error. The last type of error is
1825  // when non-success error code is returned in the response carried
1826  // in the HTTP message or if the JSON response is otherwise broken.
1827 
1828  std::string error_message;
1829 
1830  // Handle first two groups of errors.
1831  if (ec || !error_str.empty()) {
1832  error_message = (ec ? ec.message() : error_str);
1834  .arg(remote_config->getLogLabel())
1835  .arg(error_message);
1836 
1837  } else {
1838 
1839  // Handle third group of errors.
1840  try {
1841  int rcode = 0;
1842  static_cast<void>(verifyAsyncResponse(response, rcode));
1843 
1844  } catch (const std::exception& ex) {
1845  error_message = ex.what();
1847  .arg(remote_config->getLogLabel())
1848  .arg(error_message);
1849  }
1850  }
1851 
1852  // If there was an error communicating with the partner, mark the
1853  // partner as unavailable.
1854  if (!error_message.empty()) {
1855  communication_state_->setPartnerState("unavailable");
1856  }
1857 
1858  // Invoke post request action if it was specified.
1859  if (post_request_action) {
1860  post_request_action(error_message.empty(),
1861  error_message);
1862  }
1863  },
1865  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1866  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1867  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1868  );
1869 }
1870 
1871 void
1873  network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1874 }
1875 
1876 void
1878  network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1879 }
1880 
1881 void
1883  PostSyncCallback null_action;
1884 
1885  // Timeout is configured in milliseconds. Need to convert to seconds.
1886  unsigned int dhcp_disable_timeout =
1887  static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
1888  if (dhcp_disable_timeout == 0) {
1889  // Ensure that we always use at least 1 second timeout.
1890  dhcp_disable_timeout = 1;
1891  }
1892 
1893  asyncSyncLeases(*client_, config_->getFailoverPeerConfig()->getName(),
1894  dhcp_disable_timeout, LeasePtr(), null_action);
1895 }
1896 
1897 void
1899  const std::string& server_name,
1900  const unsigned int max_period,
1901  const dhcp::LeasePtr& last_lease,
1902  PostSyncCallback post_sync_action,
1903  const bool dhcp_disabled) {
1904  // Synchronization starts with a command to disable DHCP service of the
1905  // peer from which we're fetching leases. We don't want the other server
1906  // to allocate new leases while we fetch from it. The DHCP service will
1907  // be disabled for a certain amount of time and will be automatically
1908  // re-enabled if we die during the synchronization.
1909  asyncDisableDHCPService(http_client, server_name, max_period,
1910  [this, &http_client, server_name, max_period, last_lease,
1911  post_sync_action, dhcp_disabled]
1912  (const bool success, const std::string& error_message) {
1913 
1914  // If we have successfully disabled the DHCP service on the peer,
1915  // we can start fetching the leases.
1916  if (success) {
1917  // The last argument indicates that disabling the DHCP
1918  // service on the partner server was successful.
1919  asyncSyncLeasesInternal(http_client, server_name, max_period,
1920  last_lease, post_sync_action, true);
1921 
1922  } else {
1923  post_sync_action(success, error_message, dhcp_disabled);
1924  }
1925  });
1926 }
1927 
1928 void
1930  const std::string& server_name,
1931  const unsigned int max_period,
1932  const dhcp::LeasePtr& last_lease,
1933  PostSyncCallback post_sync_action,
1934  const bool dhcp_disabled) {
1935 
1936  HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1937 
1938  // Create HTTP/1.1 request including our command.
1939  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1940  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1941  HostHttpHeader(partner_config->getUrl().getHostname()));
1942  partner_config->addBasicAuthHttpHeader(request);
1944  request->setBodyAsJson(CommandCreator::createLease4GetPage(
1945  boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
1946 
1947  } else {
1948  request->setBodyAsJson(CommandCreator::createLease6GetPage(
1949  boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
1950  }
1951  request->finalize();
1952 
1953  // Response object should also be created because the HTTP client needs
1954  // to know the type of the expected response.
1955  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1956 
1957  // Schedule asynchronous HTTP request.
1958  http_client.asyncSendRequest(partner_config->getUrl(),
1959  partner_config->getTlsContext(),
1960  request, response,
1961  [this, partner_config, post_sync_action, &http_client, server_name,
1962  max_period, dhcp_disabled]
1963  (const boost::system::error_code& ec,
1964  const HttpResponsePtr& response,
1965  const std::string& error_str) {
1966 
1967  // Holds last lease received on the page of leases. If the last
1968  // page was hit, this value remains null.
1969  LeasePtr last_lease;
1970 
1971  // There are three possible groups of errors during the heartbeat.
1972  // One is the IO error causing issues in communication with the peer.
1973  // Another one is an HTTP parsing error. The last type of error is
1974  // when non-success error code is returned in the response carried
1975  // in the HTTP message or if the JSON response is otherwise broken.
1976 
1977  std::string error_message;
1978 
1979  // Handle first two groups of errors.
1980  if (ec || !error_str.empty()) {
1981  error_message = (ec ? ec.message() : error_str);
1983  .arg(partner_config->getLogLabel())
1984  .arg(error_message);
1985 
1986  } else {
1987  // Handle third group of errors.
1988  try {
1989  int rcode = 0;
1990  ConstElementPtr args = verifyAsyncResponse(response, rcode);
1991 
1992  // Arguments must be a map.
1993  if (args && (args->getType() != Element::map)) {
1995  "arguments in the received response must be a map");
1996  }
1997 
1998  ConstElementPtr leases = args->get("leases");
1999  if (!leases || (leases->getType() != Element::list)) {
2001  "server response does not contain leases argument or this"
2002  " argument is not a list");
2003  }
2004 
2005  // Iterate over the leases and update the database as appropriate.
2006  const auto& leases_element = leases->listValue();
2007 
2009  .arg(leases_element.size())
2010  .arg(server_name);
2011 
2012  for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2013  try {
2014 
2016  Lease4Ptr lease = Lease4::fromElement(*l);
2017 
2018  // Check if there is such lease in the database already.
2019  Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2020  if (!existing_lease) {
2021  // There is no such lease, so let's add it.
2022  LeaseMgrFactory::instance().addLease(lease);
2023 
2024  } else if (existing_lease->cltt_ < lease->cltt_) {
2025  // If the existing lease is older than the fetched lease, update
2026  // the lease in our local database.
2027  // Update lease current expiration time with value received from the
2028  // database. Some database backends reject operations on the lease if
2029  // the current expiration time value does not match what is stored.
2030  Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2031  LeaseMgrFactory::instance().updateLease4(lease);
2032 
2033  } else {
2035  .arg(lease->addr_.toText())
2036  .arg(lease->subnet_id_);
2037  }
2038 
2039  // If we're not on the last page and we're processing final lease on
2040  // this page, let's record the lease as input to the next
2041  // lease4-get-page command.
2042  if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2043  (l + 1 == leases_element.end())) {
2044  last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2045  }
2046 
2047  } else {
2048  Lease6Ptr lease = Lease6::fromElement(*l);
2049 
2050  // Check if there is such lease in the database already.
2051  Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2052  lease->addr_);
2053  if (!existing_lease) {
2054  // There is no such lease, so let's add it.
2055  LeaseMgrFactory::instance().addLease(lease);
2056 
2057  } else if (existing_lease->cltt_ < lease->cltt_) {
2058  // If the existing lease is older than the fetched lease, update
2059  // the lease in our local database.
2060  // Update lease current expiration time with value received from the
2061  // database. Some database backends reject operations on the lease if
2062  // the current expiration time value does not match what is stored.
2063  Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2064  LeaseMgrFactory::instance().updateLease6(lease);
2065 
2066  } else {
2068  .arg(lease->addr_.toText())
2069  .arg(lease->subnet_id_);
2070  }
2071 
2072  // If we're not on the last page and we're processing final lease on
2073  // this page, let's record the lease as input to the next
2074  // lease6-get-page command.
2075  if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2076  (l + 1 == leases_element.end())) {
2077  last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2078  }
2079  }
2080 
2081  } catch (const std::exception& ex) {
2083  .arg((*l)->str())
2084  .arg(ex.what());
2085  }
2086  }
2087 
2088  } catch (const std::exception& ex) {
2089  error_message = ex.what();
2091  .arg(partner_config->getLogLabel())
2092  .arg(error_message);
2093  }
2094  }
2095 
2096  // If there was an error communicating with the partner, mark the
2097  // partner as unavailable.
2098  if (!error_message.empty()) {
2099  communication_state_->setPartnerState("unavailable");
2100 
2101  } else if (last_lease) {
2102  // This indicates that there are more leases to be fetched.
2103  // Therefore, we have to send another leaseX-get-page command.
2104  asyncSyncLeases(http_client, server_name, max_period, last_lease,
2105  post_sync_action, dhcp_disabled);
2106  return;
2107  }
2108 
2109  // Invoke post synchronization action if it was specified.
2110  if (post_sync_action) {
2111  post_sync_action(error_message.empty(),
2112  error_message,
2113  dhcp_disabled);
2114  }
2115  },
2116  HttpClient::RequestTimeout(config_->getSyncTimeout()),
2117  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2118  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2119  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2120  );
2121 
2122 }
2123 
2125 HAService::processSynchronize(const std::string& server_name,
2126  const unsigned int max_period) {
2127  std::string answer_message;
2128  int sync_status = synchronize(answer_message, server_name, max_period);
2129  return (createAnswer(sync_status, answer_message));
2130 }
2131 
2132 int
2133 HAService::synchronize(std::string& status_message, const std::string& server_name,
2134  const unsigned int max_period) {
2135  IOService io_service;
2136  HttpClient client(io_service);
2137 
2138  asyncSyncLeases(client, server_name, max_period, Lease4Ptr(),
2139  [&](const bool success, const std::string& error_message,
2140  const bool dhcp_disabled) {
2141  // If there was a fatal error while fetching the leases, let's
2142  // log an error message so as it can be included in the response
2143  // to the controlling client.
2144  if (!success) {
2145  status_message = error_message;
2146  }
2147 
2148  // Whether or not there was an error while fetching the leases,
2149  // we need to re-enable the DHCP service on the peer if the
2150  // DHCP service was disabled in the course of synchronization.
2151  if (dhcp_disabled) {
2152  asyncEnableDHCPService(client, server_name,
2153  [&](const bool success,
2154  const std::string& error_message) {
2155  // It is possible that we have already recorded an error
2156  // message while synchronizing the lease database. Don't
2157  // override the existing error message.
2158  if (!success && status_message.empty()) {
2159  status_message = error_message;
2160  }
2161 
2162  // The synchronization process is completed, so let's break
2163  // the IO service so as we can return the response to the
2164  // controlling client.
2165  io_service.stop();
2166  });
2167 
2168  } else {
2169  // Also stop IO service if there is no need to enable DHCP
2170  // service.
2171  io_service.stop();
2172  }
2173  });
2174 
2175  LOG_INFO(ha_logger, HA_SYNC_START).arg(server_name);
2176 
2177  // Measure duration of the synchronization.
2178  Stopwatch stopwatch;
2179 
2180  // Run the IO service until it is stopped by any of the callbacks. This
2181  // makes it synchronous.
2182  io_service.run();
2183 
2184  // End measuring duration.
2185  stopwatch.stop();
2186 
2187  // If an error message has been recorded, return an error to the controlling
2188  // client.
2189  if (!status_message.empty()) {
2191 
2193  .arg(server_name)
2194  .arg(status_message);
2195 
2196  return (CONTROL_RESULT_ERROR);
2197 
2198  }
2199 
2200  // Everything was fine, so let's return a success.
2201  status_message = "Lease database synchronization complete.";
2203 
2205  .arg(server_name)
2206  .arg(stopwatch.logFormatLastDuration());
2207 
2208  return (CONTROL_RESULT_SUCCESS);
2209 }
2210 
2211 void
2213  const HAConfig::PeerConfigPtr& config,
2214  PostRequestCallback post_request_action) {
2215  if (lease_update_backlog_.size() == 0) {
2216  post_request_action(true, "");
2217  return;
2218  }
2219 
2220  ConstElementPtr command;
2223  Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2224  if (op_type == LeaseUpdateBacklog::ADD) {
2225  command = CommandCreator::createLease4Update(*lease);
2226  } else {
2227  command = CommandCreator::createLease4Delete(*lease);
2228  }
2229 
2230  } else {
2232  }
2233 
2234  // Create HTTP/1.1 request including our command.
2235  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2236  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2237  HostHttpHeader(config->getUrl().getHostname()));
2238  config->addBasicAuthHttpHeader(request);
2239  request->setBodyAsJson(command);
2240  request->finalize();
2241 
2242  // Response object should also be created because the HTTP client needs
2243  // to know the type of the expected response.
2244  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2245 
2246  http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2247  request, response,
2248  [this, &http_client, config, post_request_action]
2249  (const boost::system::error_code& ec,
2250  const HttpResponsePtr& response,
2251  const std::string& error_str) {
2252 
2253  std::string error_message;
2254 
2255  if (ec || !error_str.empty()) {
2256  error_message = (ec ? ec.message() : error_str);
2258  .arg(config->getLogLabel())
2259  .arg(ec ? ec.message() : error_str);
2260 
2261  } else {
2262  // Handle third group of errors.
2263  try {
2264  int rcode = 0;
2265  auto args = verifyAsyncResponse(response, rcode);
2266  } catch (const std::exception& ex) {
2267  error_message = ex.what();
2269  .arg(config->getLogLabel())
2270  .arg(ex.what());
2271  }
2272  }
2273 
2274  // Recursively send all outstanding lease updates or break when an
2275  // error occurs. In DHCPv6, this is a single iteration because we use
2276  // lease6-bulk-apply, which combines many lease updates in a single
2277  // transaction. In the case of DHCPv4, each update is sent in its own
2278  // transaction.
2279  if (error_message.empty()) {
2280  asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2281  } else {
2282  post_request_action(error_message.empty(), error_message);
2283  }
2284  });
2285 }
2286 
2287 bool
2289  auto num_updates = lease_update_backlog_.size();
2290  if (num_updates == 0) {
2292  return (true);
2293  }
2294 
2295  IOService io_service;
2296  HttpClient client(io_service);
2297  auto remote_config = config_->getFailoverPeerConfig();
2298  bool updates_successful = true;
2299 
2301  .arg(num_updates)
2302  .arg(remote_config->getName());
2303 
2304  asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2305  [&](const bool success, const std::string&) {
2306  io_service.stop();
2307  updates_successful = success;
2308  });
2309 
2310  // Measure duration of the updates.
2311  Stopwatch stopwatch;
2312 
2313  // Run the IO service until it is stopped by the callback. This makes it synchronous.
2314  io_service.run();
2315 
2316  // End measuring duration.
2317  stopwatch.stop();
2318 
2319  if (updates_successful) {
2321  .arg(remote_config->getName())
2322  .arg(stopwatch.logFormatLastDuration());
2323  }
2324 
2325  return (updates_successful);
2326 }
2327 
2328 void
2330  const HAConfig::PeerConfigPtr& config,
2331  PostRequestCallback post_request_action) {
2333 
2334  // Create HTTP/1.1 request including our command.
2335  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2336  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2337  HostHttpHeader(config->getUrl().getHostname()));
2338  config->addBasicAuthHttpHeader(request);
2339  request->setBodyAsJson(command);
2340  request->finalize();
2341 
2342  // Response object should also be created because the HTTP client needs
2343  // to know the type of the expected response.
2344  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2345 
2346  http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2347  request, response,
2348  [this, config, post_request_action]
2349  (const boost::system::error_code& ec,
2350  const HttpResponsePtr& response,
2351  const std::string& error_str) {
2352 
2353  std::string error_message;
2354 
2355  if (ec || !error_str.empty()) {
2356  error_message = (ec ? ec.message() : error_str);
2358  .arg(config->getLogLabel())
2359  .arg(ec ? ec.message() : error_str);
2360 
2361  } else {
2362  // Handle third group of errors.
2363  try {
2364  int rcode = 0;
2365  auto args = verifyAsyncResponse(response, rcode);
2366  } catch (const std::exception& ex) {
2367  error_message = ex.what();
2369  .arg(config->getLogLabel())
2370  .arg(ex.what());
2371  }
2372  }
2373 
2374  post_request_action(error_message.empty(), error_message);
2375  });
2376 }
2377 
2378 bool
2380  IOService io_service;
2381  HttpClient client(io_service);
2382  auto remote_config = config_->getFailoverPeerConfig();
2383  bool reset_successful = true;
2384 
2385  asyncSendHAReset(client, remote_config,
2386  [&](const bool success, const std::string&) {
2387  io_service.stop();
2388  reset_successful = success;
2389  });
2390 
2391  // Run the IO service until it is stopped by the callback. This makes it synchronous.
2392  io_service.run();
2393 
2394  return (reset_successful);
2395 }
2396 
2398 HAService::processScopes(const std::vector<std::string>& scopes) {
2399  try {
2400  query_filter_.serveScopes(scopes);
2402 
2403  } catch (const std::exception& ex) {
2404  return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2405  }
2406 
2407  return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2408 }
2409 
2412  if (unpause()) {
2413  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2414  }
2415  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2416 }
2417 
2420  if (cancel) {
2422  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2423  " maintenance for the server not in the"
2424  " in-maintenance state."));
2425  }
2426 
2429  runModel(NOP_EVT);
2430  return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled."));
2431  }
2432 
2433  switch (getCurrState()) {
2434  case HA_BACKUP_ST:
2436  case HA_TERMINATED_ST:
2437  // The reason why we don't return an error result here is that we have to
2438  // have a way to distinguish between the errors caused by the communication
2439  // issues and the cases when there is no communication error but the server
2440  // is not allowed to enter the in-maintenance state. In the former case, the
2441  // partner would go to partner-down. In the case signaled by the special
2442  // result code entering the maintenance state is not allowed.
2444  "Unable to transition the server from the "
2445  + stateToString(getCurrState()) + " to"
2446  " in-maintenance state."));
2447  default:
2450  }
2451  return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2452 }
2453 
2456  switch (getCurrState()) {
2457  case HA_BACKUP_ST:
2458  case HA_IN_MAINTENANCE_ST:
2460  case HA_TERMINATED_ST:
2461  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2462  " the " + stateToString(getCurrState()) + " to"
2463  " partner-in-maintenance state."));
2464  default:
2465  ;
2466  }
2467 
2468  HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2469 
2470  // Create HTTP/1.1 request including ha-maintenance-notify command
2471  // with the cancel flag set to false.
2472  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2473  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2474  HostHttpHeader(remote_config->getUrl().getHostname()));
2475  remote_config->addBasicAuthHttpHeader(request);
2476  request->setBodyAsJson(CommandCreator::createMaintenanceNotify(false, server_type_));
2477  request->finalize();
2478 
2479  // Response object should also be created because the HTTP client needs
2480  // to know the type of the expected response.
2481  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2482 
2483  IOService io_service;
2484  HttpClient client(io_service);
2485 
2486  boost::system::error_code captured_ec;
2487  std::string captured_error_message;
2488  int captured_rcode = 0;
2489 
2490  // Schedule asynchronous HTTP request.
2491  client.asyncSendRequest(remote_config->getUrl(),
2492  remote_config->getTlsContext(),
2493  request, response,
2494  [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2495  &captured_rcode]
2496  (const boost::system::error_code& ec,
2497  const HttpResponsePtr& response,
2498  const std::string& error_str) {
2499 
2500  io_service.stop();
2501 
2502  // There are three possible groups of errors. One is the IO error
2503  // causing issues in communication with the peer. Another one is
2504  // an HTTP parsing error. The last type of error is when non-success
2505  // error code is returned in the response carried in the HTTP message
2506  // or if the JSON response is otherwise broken.
2507 
2508  std::string error_message;
2509 
2510  // Handle first two groups of errors.
2511  if (ec || !error_str.empty()) {
2512  error_message = (ec ? ec.message() : error_str);
2514  .arg(remote_config->getLogLabel())
2515  .arg(error_message);
2516 
2517  } else {
2518 
2519  // Handle third group of errors.
2520  try {
2521  static_cast<void>(verifyAsyncResponse(response, captured_rcode));
2522 
2523  } catch (const std::exception& ex) {
2524  error_message = ex.what();
2526  .arg(remote_config->getLogLabel())
2527  .arg(error_message);
2528  }
2529  }
2530 
2531  // If there was an error communicating with the partner, mark the
2532  // partner as unavailable.
2533  if (!error_message.empty()) {
2534  communication_state_->setPartnerState("unavailable");
2535  }
2536 
2537  captured_ec = ec;
2538  captured_error_message = error_message;
2539  },
2541  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2542  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2543  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2544  );
2545 
2546  // Run the IO service until it is stopped by any of the callbacks. This
2547  // makes it synchronous.
2548  io_service.run();
2549 
2550  // If there was a communication problem with the partner we assume that
2551  // the partner is already down while we receive this command.
2552  if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2555  runModel(NOP_EVT);
2557  "Server is now in the partner-down state as its"
2558  " partner appears to be offline for maintenance."));
2559 
2560  } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2561  // If the partner responded indicating no error it means that the
2562  // partner has been transitioned to the in-maintenance state. In that
2563  // case we transition to the partner-in-maintenance state.
2566  runModel(NOP_EVT);
2567 
2568  } else {
2569  // Partner server returned a special status code which means that it can't
2570  // transition to the partner-in-maintenance state.
2571  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2572  " partner-in-maintenance state. The partner server responded"
2573  " with the following message to the ha-maintenance-notify"
2574  " command: " + captured_error_message + "."));
2575 
2576  }
2577 
2579  "Server is now in the partner-in-maintenance state"
2580  " and its partner is in-maintenance state. The partner"
2581  " can be now safely shut down."));
2582 }
2583 
2587  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2588  " request because the server is not in the"
2589  " partner-in-maintenance state."));
2590  }
2591 
2592  HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2593 
2594  // Create HTTP/1.1 request including ha-maintenance-notify command
2595  // with the cancel flag set to true.
2596  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2597  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2598  HostHttpHeader(remote_config->getUrl().getHostname()));
2599  remote_config->addBasicAuthHttpHeader(request);
2600  request->setBodyAsJson(CommandCreator::createMaintenanceNotify(true, server_type_));
2601  request->finalize();
2602 
2603  // Response object should also be created because the HTTP client needs
2604  // to know the type of the expected response.
2605  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2606 
2607  IOService io_service;
2608  HttpClient client(io_service);
2609 
2610  std::string error_message;
2611 
2612  // Schedule asynchronous HTTP request.
2613  client.asyncSendRequest(remote_config->getUrl(),
2614  remote_config->getTlsContext(),
2615  request, response,
2616  [this, remote_config, &io_service, &error_message]
2617  (const boost::system::error_code& ec,
2618  const HttpResponsePtr& response,
2619  const std::string& error_str) {
2620 
2621  io_service.stop();
2622 
2623  // Handle first two groups of errors.
2624  if (ec || !error_str.empty()) {
2625  error_message = (ec ? ec.message() : error_str);
2627  .arg(remote_config->getLogLabel())
2628  .arg(error_message);
2629 
2630  } else {
2631 
2632  // Handle third group of errors.
2633  try {
2634  int rcode = 0;
2635  static_cast<void>(verifyAsyncResponse(response, rcode));
2636 
2637  } catch (const std::exception& ex) {
2638  error_message = ex.what();
2640  .arg(remote_config->getLogLabel())
2641  .arg(error_message);
2642  }
2643  }
2644 
2645  // If there was an error communicating with the partner, mark the
2646  // partner as unavailable.
2647  if (!error_message.empty()) {
2648  communication_state_->setPartnerState("unavailable");
2649  }
2650  },
2652  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2653  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2654  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2655  );
2656 
2657  // Run the IO service until it is stopped by any of the callbacks. This
2658  // makes it synchronous.
2659  io_service.run();
2660 
2661  // There was an error in communication with the partner or the
2662  // partner was unable to revert its state.
2663  if (!error_message.empty()) {
2665  "Unable to cancel maintenance. The partner server responded"
2666  " with the following message to the ha-maintenance-notify"
2667  " command: " + error_message + "."));
2668  }
2669 
2670  // Successfully reverted partner's state. Let's also revert our state to the
2671  // previous one.
2674  runModel(NOP_EVT);
2675 
2677  "Server maintenance successfully canceled."));
2678 }
2679 
2681 HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
2682  // Set the return code to error in case of early throw.
2683  rcode = CONTROL_RESULT_ERROR;
2684  // The response must cast to JSON type.
2685  HttpResponseJsonPtr json_response =
2686  boost::dynamic_pointer_cast<HttpResponseJson>(response);
2687  if (!json_response) {
2688  isc_throw(CtrlChannelError, "no valid HTTP response found");
2689  }
2690 
2691  // Body holds the response to our command.
2692  ConstElementPtr body = json_response->getBodyAsJson();
2693  if (!body) {
2694  isc_throw(CtrlChannelError, "no body found in the response");
2695  }
2696 
2697  // Body should contain a list of responses from multiple servers.
2698  if (body->getType() != Element::list) {
2699  // Some control agent errors are returned as a map.
2700  if (body->getType() == Element::map) {
2701  ElementPtr list = Element::createList();
2702  ElementPtr answer = Element::createMap();
2703  answer->set(CONTROL_RESULT, Element::create(rcode));
2704  ConstElementPtr text = body->get(CONTROL_TEXT);
2705  if (text) {
2706  answer->set(CONTROL_TEXT, text);
2707  }
2708  list->add(answer);
2709  body = list;
2710  } else {
2711  isc_throw(CtrlChannelError, "body of the response must be a list");
2712  }
2713  }
2714 
2715  // There must be at least one response.
2716  if (body->empty()) {
2717  isc_throw(CtrlChannelError, "list of responses must not be empty");
2718  }
2719 
2720  // Check if the status code of the first response. We don't support multiple
2721  // at this time, because we always send a request to a single location.
2722  ConstElementPtr args = parseAnswer(rcode, body->get(0));
2723  if ((rcode != CONTROL_RESULT_SUCCESS) &&
2724  (rcode != CONTROL_RESULT_EMPTY)) {
2725  std::ostringstream s;
2726  // Include an error text if available.
2727  if (args && args->getType() == Element::string) {
2728  s << args->stringValue() << ", ";
2729  }
2730  // Include an error code.
2731  s << "error code " << rcode;
2732  isc_throw(CtrlChannelError, s.str());
2733  }
2734 
2735  return (args);
2736 }
2737 
2738 bool
2739 HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
2740 
2741  // If client is running it's own IOService we do NOT want to
2742  // register the socket with IfaceMgr.
2743  if (client_->getThreadIOService()) {
2744  return (true);
2745  }
2746 
2747  // If things look OK register the socket with Interface Manager. Note
2748  // we don't register if the FD is < 0 to avoid an exception throw.
2749  // It is unlikely that this will occur but we want to be liberal
2750  // and avoid issues.
2751  if ((!ec || (ec.value() == boost::asio::error::in_progress))
2752  && (tcp_native_fd >= 0)) {
2753  // External socket callback is a NOP. Ready events handlers are
2754  // run by an explicit call IOService ready in kea-dhcp<n> code.
2755  // We are registering the socket only to interrupt main-thread
2756  // select().
2757  IfaceMgr::instance().addExternalSocket(tcp_native_fd,
2758  std::bind(&HAService::socketReadyHandler, this, ph::_1)
2759  );
2760  }
2761 
2762  // If ec.value() == boost::asio::error::already_connected, we should already
2763  // be registered, so nothing to do. If it is any other value, then connect
2764  // failed and Connection logic should handle that, not us, so no matter
2765  // what happens we're returning true.
2766  return (true);
2767 }
2768 
2769 void
2770 HAService::socketReadyHandler(int tcp_native_fd) {
2771  // If the socket is ready but does not belong to one of our client's
2772  // ongoing transactions, we close it. This will unregister it from
2773  // IfaceMgr and ensure the client starts over with a fresh connection
2774  // if it needs to do so.
2775  client_->closeIfOutOfBand(tcp_native_fd);
2776 }
2777 
2778 void
2779 HAService::clientCloseHandler(int tcp_native_fd) {
2780  if (tcp_native_fd >= 0) {
2781  IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
2782  }
2783 };
2784 
2785 size_t
2787  if (MultiThreadingMgr::instance().getMode()) {
2788  std::lock_guard<std::mutex> lock(mutex_);
2789  return (pending_requests_.size());
2790  } else {
2791  return (pending_requests_.size());
2792  }
2793 }
2794 
2795 template<typename QueryPtrType>
2796 int
2797 HAService::getPendingRequest(const QueryPtrType& query) {
2798  if (MultiThreadingMgr::instance().getMode()) {
2799  std::lock_guard<std::mutex> lock(mutex_);
2800  return (getPendingRequestInternal(query));
2801  } else {
2802  return (getPendingRequestInternal(query));
2803  }
2804 }
2805 
2806 template<typename QueryPtrType>
2807 int
2808 HAService::getPendingRequestInternal(const QueryPtrType& query) {
2809  if (pending_requests_.count(query) == 0) {
2810  return (0);
2811  } else {
2812  return (pending_requests_[query]);
2813  }
2814 }
2815 
2816 void
2818  // Add critical section callbacks.
2819  MultiThreadingMgr::instance().addCriticalSectionCallbacks("HA_MT",
2820  std::bind(&HAService::pauseClientAndListener, this),
2821  std::bind(&HAService::resumeClientAndListener, this));
2822 
2823  if (client_) {
2824  client_->start();
2825  }
2826 
2827  if (listener_) {
2828  listener_->start();
2829  }
2830 }
2831 
2832 void
2834  // Since we're used as CS callback we need to suppress
2835  // any exceptions, unlikely though they may be.
2836  try {
2837  if (client_) {
2838  client_->pause();
2839  }
2840 
2841  if (listener_) {
2842  listener_->pause();
2843  }
2844  } catch (std::exception& ex) {
2846  .arg(ex.what());
2847  }
2848 }
2849 
2850 void
2852  // Since we're used as CS callback we need to suppress
2853  // any exceptions, unlikely though they may be.
2854  try {
2855  if (client_) {
2856  client_->resume();
2857  }
2858 
2859  if (listener_) {
2860  listener_->resume();
2861  }
2862  } catch (std::exception& ex) {
2864  .arg(ex.what());
2865  }
2866 }
2867 
2868 void
2870  // Remove critical section callbacks.
2871  MultiThreadingMgr::instance().removeCriticalSectionCallbacks("HA_MT");
2872 
2873  if (client_) {
2874  client_->stop();
2875  }
2876 
2877  if (listener_) {
2878  listener_->stop();
2879  }
2880 }
2881 
2882 // Explicit instantiations.
2883 template int HAService::getPendingRequest(const Pkt4Ptr&);
2884 template int HAService::getPendingRequest(const Pkt6Ptr&);
2885 
2886 } // end of namespace isc::ha
2887 } // end of namespace isc
void defineState(unsigned int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
Definition: state_model.cc:196
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
Definition: ha_service.h:1116
static const int NOP_EVT
Signifies that no event has occurred.
Definition: state_model.h:292
static data::ConstElementPtr createHAReset(const HAServerType &server_type)
Creates ha-reset command.
const isc::log::MessageID HA_SYNC_FAILED
Definition: ha_messages.h:99
const int HA_TERMINATED_ST
HA service terminated state.
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition: ha_service.h:68
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition: macros.h:26
virtual void defineStates()
Defines states of the HA service.
Definition: ha_service.cc:142
data::ConstElementPtr processStatusGet() const
Processes status-get command and returns a response.
Definition: ha_service.cc:1500
Represents HTTP Host header.
Definition: http_header.h:68
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED
Definition: ha_messages.h:80
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
Definition: ha_service.cc:960
data::ConstElementPtr processHAReset()
Processes ha-reset command and returns a response.
Definition: ha_service.cc:1569
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition: date_time.cc:30
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
void pauseClientAndListener()
Pauses client and(or) listener thread pool operations.
Definition: ha_service.cc:2833
const isc::log::MessageID HA_STATE_TRANSITION
Definition: ha_messages.h:97
const int HA_HOT_STANDBY_ST
Hot standby state.
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result")
Structure that holds a lease for IPv4 address.
Definition: lease.h:294
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
Definition: ha_service.cc:1216
const isc::log::MessageID HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED
Definition: ha_messages.h:56
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition: ha_service.h:65
void defineEvent(unsigned int value, const std::string &label)
Adds an event value and associated label to the set of events.
Definition: state_model.cc:170
const int DBGLVL_TRACE_BASIC
Trace basic operations.
Definition: log_dbglevels.h:65
int getNormalState() const
Returns normal operation state for the current configuration.
Definition: ha_service.cc:923
bool doOnExit()
Checks if on exit flag is true.
Definition: state_model.cc:347
void readyStateHandler()
Handler for "ready" state.
Definition: ha_service.cc:561
void serveNoScopes()
Disables all scopes.
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition: macros.h:20
ConstElementPtr createAnswer(const int status_code, const std::string &text, const ConstElementPtr &arg)
const StatePtr getState(unsigned int value)
Fetches the state referred to by value.
Definition: state_model.cc:213
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
void asyncSendRequest(const Url &url, const asiolink::TlsContextPtr &tls_context, const HttpRequestPtr &request, const HttpResponsePtr &response, const RequestHandler &request_callback, const RequestTimeout &request_timeout=RequestTimeout(10000), const ConnectHandler &connect_callback=ConnectHandler(), const HandshakeHandler &handshake_callback=HandshakeHandler(), const CloseHandler &close_callback=CloseHandler())
Queues new asynchronous HTTP request for a given URL.
Definition: client.cc:1921
size_t pendingRequestSize()
Get the number of entries in the pending request map.
Definition: ha_service.cc:2786
data::ConstElementPtr processScopes(const std::vector< std::string > &scopes)
Processes ha-scopes command and returns a response.
Definition: ha_service.cc:2398
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
Definition: ha_service.cc:1701
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers' configurations.
Definition: ha_config.h:232
void asyncEnableDHCPService(http::HttpClient &http_client, const std::string &server_name, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-enable" command to the specified server.
Definition: ha_service.cc:1796
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
Definition: response_json.h:24
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition: ha_messages.h:49
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition: ha_messages.h:68
bool unpause()
Unpauses the HA state machine with logging.
Definition: ha_service.cc:939
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition: ha_messages.h:58
void passiveBackupStateHandler()
Handler for "passive-backup" state.
Definition: ha_service.cc:543
const int HA_PARTNER_DOWN_ST
Partner down state.
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
Definition: state_model.cc:421
void asyncSyncLeases()
Asynchronously reads leases from a peer and updates local lease database.
Definition: ha_service.cc:1882
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition: ha_messages.h:71
data::ConstElementPtr processHeartbeat()
Processes ha-heartbeat command and returns a response.
Definition: ha_service.cc:1549
QueryFilter query_filter_
Selects queries to be processed/dropped.
Definition: ha_service.h:1122
An abstract API for lease database.
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
Definition: ha_service.h:1236
OpType
Type of the lease update (operation type).
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
Definition: ha_messages.h:85
const isc::log::MessageID HA_LEASES_SYNC_COMMUNICATIONS_FAILED
Definition: ha_messages.h:61
CommunicationStatePtr communication_state_
Holds communication state with a peer.
Definition: ha_service.h:1119
virtual void verifyEvents()
Verifies events used by the HA service.
Definition: ha_service.cc:129
const int HA_IN_MAINTENANCE_ST
In maintenance state.
HTTP request/response timeout value.
Definition: client.h:90
unsigned int getLastEvent() const
Fetches the model's last event.
Definition: state_model.cc:367
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition: ha_service.h:56
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
Definition: ha_service.cc:1435
bool clientConnectHandler(const boost::system::error_code &ec, int tcp_native_fd)
HttpClient connect callback handler.
Definition: ha_service.cc:2739
const int HA_LOAD_BALANCING_ST
Load balancing state.
HAServerType server_type_
DHCP server type.
Definition: ha_service.h:1109
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition: macros.h:32
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
boost::shared_ptr< Element > ElementPtr
Definition: data.h:20
const isc::log::MessageID HA_LEASE_SYNC_STALE_LEASE4_SKIP
Definition: ha_messages.h:65
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition: ha_service.h:59
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition: ha_messages.h:33
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition: ha_messages.h:81
const isc::log::MessageID HA_RESET_FAILED
Definition: ha_messages.h:90
std::set< std::string > getServedScopes() const
Returns served scopes.
data::ConstElementPtr processMaintenanceCancel()
Processes ha-maintenance-cancel command and returns a response.
Definition: ha_service.cc:2585
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED
Definition: ha_messages.h:20
static data::ConstElementPtr createLease6GetPage(const dhcp::Lease6Ptr &lease6, const uint32_t limit)
Creates lease6-get-page command.
void waitingStateHandler()
Handler for "waiting" state.
Definition: ha_service.cc:750
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition: ha_messages.h:60
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
Definition: ha_service.cc:1249
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
void partnerInMaintenanceStateHandler()
Handler for "partner-in-maintenance" state.
Definition: ha_service.cc:504
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
virtual ~HAService()
Destructor.
Definition: ha_service.cc:108
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
Definition: ha_service.cc:1067
unsigned int getCurrState() const
Fetches the model's current state.
Definition: state_model.cc:355
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition: ha_messages.h:67
const isc::log::MessageID HA_RESET_COMMUNICATIONS_FAILED
Definition: ha_messages.h:89
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition: lease.h:26
void serveDefaultScopes()
Serve default scopes for the given HA mode.
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
Definition: ha_service.h:1103
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition: ha_service.h:62
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:92
virtual void runModel(unsigned int event)
Processes events through the state model.
Definition: state_model.cc:112
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
Definition: ha_service.h:1100
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition: ha_messages.h:50
bool isPartnerStateInvalid() const
Indicates if the partner's state is invalid.
Definition: ha_service.cc:1072
data::ConstElementPtr processContinue()
Processes ha-continue command and returns a response.
Definition: ha_service.cc:2411
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
void stop()
Stops the stopwatch.
Definition: stopwatch.cc:35
Holds communication state between DHCPv4 servers.
void resumeClientAndListener()
Resumes client and(or) listener thread pool operations.
Definition: ha_service.cc:2851
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:28
Definition: edns.h:19
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition: ha_service.h:47
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition: ha_messages.h:70
const isc::log::MessageID HA_DHCP_DISABLE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:39
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition: ha_service.h:50
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
Definition: ha_service.cc:1403
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition: lease.h:283
std::string logFormatLastDuration() const
Returns the last measured duration in the format directly usable in log messages. ...
Definition: stopwatch.cc:75
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
void partnerDownStateHandler()
Handler for "partner-down" state.
Definition: ha_service.cc:427
void syncingStateHandler()
Handler for "syncing" state.
Definition: ha_service.cc:640
void clear()
Removes all lease updates from the queue.
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition: response.h:78
Utility class to measure code execution times.
Definition: stopwatch.h:35
void asyncSendHeartbeat()
Starts asynchronous heartbeat to a peer.
Definition: ha_service.cc:1579
void serveScopes(const std::vector< std::string > &scopes)
Enables selected scopes.
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition: ha_messages.h:59
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition: lease.h:490
void inMaintenanceStateHandler()
Handler for the "in-maintenance" state.
Definition: ha_service.cc:402
void logFailedLeaseUpdates(const dhcp::PktPtr &query, const data::ConstElementPtr &args) const
Log failed lease updates.
Definition: ha_service.cc:1448
A generic exception that is thrown when an unexpected error condition occurs.
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
Definition: ha_service.cc:1270
bool doOnEntry()
Checks if on entry flag is true.
Definition: state_model.cc:339
const char * CONTROL_TEXT
String used for storing textual description ("text")
const isc::log::MessageID HA_LEASE_SYNC_FAILED
Definition: ha_messages.h:64
bool push(const OpType op_type, const dhcp::LeasePtr &lease)
Appends lease update to the queue.
const isc::log::MessageID HA_SERVICE_STARTED
Definition: ha_messages.h:94
std::string stateToString(int state)
Returns state name.
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition: ha_messages.h:76
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:544
boost::shared_ptr< const Element > ConstElementPtr
Definition: data.h:23
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
Definition: ha_messages.h:98
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition: ha_messages.h:44
data::ConstElementPtr processMaintenanceNotify(const bool cancel)
Processes ha-maintenance-notify command and returns a response.
Definition: ha_service.cc:2419
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition: ha_messages.h:40
size_t size()
Returns the current size of the queue.
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
Definition: ha_service.cc:965
void terminatedStateHandler()
Handler for "terminated" state.
Definition: ha_service.cc:729
bool wasOverflown()
Checks if the queue was overflown.
void stopClientAndListener()
Stop the client and(or) listener instances.
Definition: ha_service.cc:2869
const isc::log::MessageID HA_LEASE_UPDATE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:69
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition: ha_messages.h:72
void serveFailoverScopes()
Enable scopes required in failover case.
void localDisableDHCPService()
Disables local DHCP service.
Definition: ha_service.cc:1872
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition: pkt.h:797
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
Definition: ha_service.cc:995
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition: ha_messages.h:51
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
const isc::log::MessageID HA_SYNC_START
Definition: ha_messages.h:101
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
void startModel(const int start_state)
Begins execution of the model.
Definition: state_model.cc:100
void unpauseModel()
Unpauses state model.
Definition: state_model.cc:276
Represents HTTP response with JSON content.
Definition: response_json.h:34
void asyncSyncLeasesInternal(http::HttpClient &http_client, const std::string &server_name, const unsigned int max_period, const dhcp::LeasePtr &last_lease, PostSyncCallback post_sync_action, const bool dhcp_disabled)
Implements fetching one page of leases during synchronization.
Definition: ha_service.cc:1929
std::function< void(const bool, const std::string &, const bool)> PostSyncCallback
Callback invoked when lease database synchronization is complete.
Definition: ha_service.h:86
HTTP client class.
Definition: client.h:87
void communicationRecoveryHandler()
Handler for the "communication-recovery" state.
Definition: ha_service.cc:210
A standard control channel exception that is thrown if a function is there is a problem with one of t...
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
Defines the logger used by the top-level component of kea-dhcp-ddns.
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
Definition: ha_messages.h:96
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
Definition: ha_service.cc:1027
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition: ha_messages.h:75
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
Definition: ha_service.cc:949
unsigned int getPrevState() const
Fetches the model's previous state.
Definition: state_model.cc:361
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED
Definition: ha_messages.h:78
data::ConstElementPtr processMaintenanceStart()
Processes ha-maintenance-start command and returns a response.
Definition: ha_service.cc:2455
HAConfigPtr config_
Pointer to the HA hooks library configuration.
Definition: ha_service.h:1106
void startHeartbeat()
Unconditionally starts one heartbeat to a peer.
Definition: ha_service.cc:1708
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition: ha_messages.h:84
const isc::log::MessageID HA_HEARTBEAT_COMMUNICATIONS_FAILED
Definition: ha_messages.h:43
void transition(unsigned int state, unsigned int event)
Sets up the model to transition into given state with a given event.
Definition: state_model.cc:264
void startClientAndListener()
Start the client and(or) listener instances.
Definition: ha_service.cc:2817
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
Definition: ha_service.h:1112
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition: ha_messages.h:42
void normalStateHandler()
Handler for the "hot-standby" and "load-balancing" states.
Definition: ha_service.cc:327
This file contains several functions and constants that are used for handling commands and responses ...
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
Definition: ha_service.cc:2288
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition: ha_config.cc:224
void localEnableDHCPService()
Enables local DHCP service.
Definition: ha_service.cc:1877
a common structure for IPv4 and IPv6 leases
Definition: lease.h:35
virtual void defineEvents()
Defines events used by the HA service.
Definition: ha_service.cc:116
std::function< void(const bool, const std::string &)> PostRequestCallback
Callback invoked when request was sent and a response received or an error occurred.
Definition: ha_service.h:77
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition: lease.h:644
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
#define LOG_DEBUG(LOGGER, LEVEL, MESSAGE)
Macro to conveniently test debug output and log it.
Definition: macros.h:14
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
isc::log::Logger ha_logger("ha-hooks")
Definition: ha_log.h:17
const isc::log::MessageID HA_LEASE_SYNC_STALE_LEASE6_SKIP
Definition: ha_messages.h:66
void backupStateHandler()
Handler for the "backup" state.
Definition: ha_service.cc:195
int getPendingRequest(const QueryPtrType &query)
Get the number of scheduled requests for a given query.
Definition: ha_service.cc:2797
const isc::log::MessageID HA_SYNC_SUCCESSFUL
Definition: ha_messages.h:102
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition: ha_messages.h:57
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition: ha_service.h:53
A multi-threaded HTTP listener that can process API commands requests.
Role
Server's role in the High Availability setup.
Definition: ha_config.h:70
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition: ha_messages.h:83
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
Definition: ha_service.cc:1102
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
Definition: parking_lots.h:375
void asyncSendHAReset(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends ha-reset command to partner asynchronously.
Definition: ha_service.cc:2329
dhcp::LeasePtr pop(OpType &op_type)
Returns the next lease update and removes it from the queue.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:88
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition: ha_messages.h:30
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition: ha_messages.h:79
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition: ha_config.cc:79
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
Definition: ha_messages.h:95
void socketReadyHandler(int tcp_native_fd)
IfaceMgr external socket ready callback handler.
Definition: ha_service.cc:2770
static data::ConstElementPtr createHeartbeat(const HAServerType &server_type)
Creates ha-heartbeat command for DHCP server.
std::string ClientClass
Defines a single class name.
Definition: classify.h:37
const EventPtr & getEvent(unsigned int value)
Fetches the event referred to by value.
Definition: state_model.cc:186
unsigned int getNextEvent() const
Fetches the model's next event.
Definition: state_model.cc:373
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
Definition: ha_messages.h:104
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition: timeouts.h:38
data::ConstElementPtr processSynchronize(const std::string &server_name, const unsigned int max_period)
Processes ha-sync command and returns a response.
Definition: ha_service.cc:2125
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_DHCP_ENABLE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:41
const isc::log::MessageID HA_LEASES_SYNC_LEASE_PAGE_RECEIVED
Definition: ha_messages.h:63
boost::shared_ptr< Lease6 > Lease6Ptr
Pointer to a Lease6 structure.
Definition: lease.h:492
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition: ha_messages.h:62
static data::ConstElementPtr createMaintenanceNotify(const bool cancel, const HAServerType &server_type)
Creates ha-maintenance-notify command.
bool isModelPaused() const
Returns whether or not the model is paused.
Definition: state_model.cc:415
data::ConstElementPtr verifyAsyncResponse(const http::HttpResponsePtr &response, int &rcode)
Checks if the response is valid or contains an error.
Definition: ha_service.cc:2681
static data::ConstElementPtr createDHCPEnable(const HAServerType &server_type)
Creates dhcp-enable command for DHCP server.
const char * MessageID
Definition: message_types.h:15
bool shouldTerminate() const
Indicates if the server should transition to the terminated state as a result of high clock skew...
Definition: ha_service.cc:1054
void clientCloseHandler(int tcp_native_fd)
HttpClient close callback handler.
Definition: ha_service.cc:2779
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
Definition: ha_service.cc:858
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:760
bool sendHAReset()
Sends ha-reset command to partner synchronously.
Definition: ha_service.cc:2379
static data::ConstElementPtr createDHCPDisable(const unsigned int max_period, const HAServerType &server_type)
Creates dhcp-disable command for DHCP server.
Holds communication state between DHCPv6 servers.
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
void postNextEvent(unsigned int event)
Sets the next event to the given event value.
Definition: state_model.cc:320
int synchronize(std::string &status_message, const std::string &server_name, const unsigned int max_period)
Synchronizes lease database with a partner.
Definition: ha_service.cc:2133
bool clientHandshakeHandler(const boost::system::error_code &)
HttpClient handshake callback handler.
Definition: ha_service.h:1067
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server's configuration.
Definition: ha_config.h:229
static data::ConstElementPtr createLease4GetPage(const dhcp::Lease4Ptr &lease4, const uint32_t limit)
Creates lease4-get-page command.
const isc::log::MessageID HA_TERMINATED
Definition: ha_messages.h:103
void asyncDisableDHCPService(http::HttpClient &http_client, const std::string &server_name, const unsigned int max_period, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-disable" command to the specified server.
Definition: ha_service.cc:1717
void asyncSendLeaseUpdatesFromBacklog(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends lease updates from backlog to partner asynchronously.
Definition: ha_service.cc:2212