|
| 1 | +/* |
| 2 | +Copyright 2024 The Machine Controller Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package anexia |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "sync" |
| 22 | + "time" |
| 23 | + |
| 24 | + "github.com/kubermatic/machine-controller/pkg/apis/cluster/common" |
| 25 | + anxtypes "github.com/kubermatic/machine-controller/pkg/cloudprovider/provider/anexia/types" |
| 26 | + anxclient "go.anx.io/go-anxcloud/pkg/client" |
| 27 | + anxaddr "go.anx.io/go-anxcloud/pkg/ipam/address" |
| 28 | + anxvm "go.anx.io/go-anxcloud/pkg/vsphere/provisioning/vm" |
| 29 | + "go.uber.org/zap" |
| 30 | +) |
| 31 | + |
| 32 | +func networkInterfacesForProvisioning(ctx context.Context, log *zap.SugaredLogger, client anxclient.Client) ([]anxvm.Network, error) { |
| 33 | + reconcileContext := getReconcileContext(ctx) |
| 34 | + |
| 35 | + config := reconcileContext.Config |
| 36 | + status := reconcileContext.Status |
| 37 | + |
| 38 | + // make sure we have the status.Networks array allocated to fill it with |
| 39 | + // data, warning if we already have something but not matching the |
| 40 | + // configuration. |
| 41 | + if len(status.Networks) != len(config.Networks) { |
| 42 | + if len(status.Networks) != 0 { |
| 43 | + log.Warn("size of status.Networks != config.Networks, this should not happen in normal operation - ignoring existing status") |
| 44 | + } |
| 45 | + |
| 46 | + status.Networks = make([]anxtypes.NetworkStatus, len(config.Networks)) |
| 47 | + } |
| 48 | + |
| 49 | + ret := make([]anxvm.Network, len(config.Networks)) |
| 50 | + for netIndex, network := range config.Networks { |
| 51 | + networkStatus := &status.Networks[netIndex] |
| 52 | + addresses := make([]string, len(network.Prefixes)) |
| 53 | + |
| 54 | + for prefixIndex, prefix := range network.Prefixes { |
| 55 | + // make sure we have the address status array allocated to fill it |
| 56 | + // with our IP reserve status, warning if we already have something |
| 57 | + // there but not matching the configuration. |
| 58 | + if len(networkStatus.Addresses) != len(network.Prefixes) { |
| 59 | + if len(networkStatus.Addresses) != 0 { |
| 60 | + log.Warnf("size of status.Networks[%[1]v].Addresses != config.Networks[%[1]v].Prefixes, this should not happen in normal operation - ignoring existing status", netIndex) |
| 61 | + } |
| 62 | + |
| 63 | + networkStatus.Addresses = make([]anxtypes.NetworkAddressStatus, len(network.Prefixes)) |
| 64 | + } |
| 65 | + |
| 66 | + reservedIP, err := getIPAddress(ctx, log, &network, prefix, &networkStatus.Addresses[prefixIndex], client) |
| 67 | + if err != nil { |
| 68 | + return nil, newError(common.CreateMachineError, "failed to reserve IP: %v", err) |
| 69 | + } |
| 70 | + |
| 71 | + addresses[prefixIndex] = reservedIP |
| 72 | + } |
| 73 | + |
| 74 | + ret[netIndex] = anxvm.Network{ |
| 75 | + VLAN: network.VlanID, |
| 76 | + IPs: addresses, |
| 77 | + |
| 78 | + // the one NIC type supported by the ADC API |
| 79 | + NICType: anxtypes.VmxNet3NIC, |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + return ret, nil |
| 84 | +} |
| 85 | + |
| 86 | +// ENGSUP-3404 is about a race condition when reserving IPs - two calls for one |
| 87 | +// IP each, coming in at "nearly the same millisecond", can result in both |
| 88 | +// reserving the same IP. |
| 89 | +// |
| 90 | +// The proposed fix was to reserve n IPs in one call, but that would require |
| 91 | +// lots of architecture changes - we can't really do the "reserve IPs for all |
| 92 | +// the Machines we want to create and then create the Machines" here. |
| 93 | +// |
| 94 | +// This mutex alleviates the issue enough, that we didn't see it in a long |
| 95 | +// time. It's not impossible this race condition was fixed in some other change |
| 96 | +// and we weren't told, but I'd rather not test this and risk having problems |
| 97 | +// again.. it's not too expensive of a Mutex. |
| 98 | +var _engsup3404mutex sync.Mutex |
| 99 | + |
| 100 | +func getIPAddress(ctx context.Context, log *zap.SugaredLogger, network *resolvedNetwork, prefix string, status *anxtypes.NetworkAddressStatus, client anxclient.Client) (string, error) { |
| 101 | + reconcileContext := getReconcileContext(ctx) |
| 102 | + |
| 103 | + // only use IP if it is still unbound |
| 104 | + if status.ReservedIP != "" && status.IPState == anxtypes.IPStateUnbound && (!status.IPProvisioningExpires.IsZero() && status.IPProvisioningExpires.After(time.Now())) { |
| 105 | + log.Infow("Re-using already provisioned IP", "ip", status.ReservedIP) |
| 106 | + return status.ReservedIP, nil |
| 107 | + } |
| 108 | + |
| 109 | + _engsup3404mutex.Lock() |
| 110 | + defer _engsup3404mutex.Unlock() |
| 111 | + |
| 112 | + log.Info("Creating a new IP for machine") |
| 113 | + addrAPI := anxaddr.NewAPI(client) |
| 114 | + config := reconcileContext.Config |
| 115 | + |
| 116 | + res, err := addrAPI.ReserveRandom(ctx, anxaddr.ReserveRandom{ |
| 117 | + LocationID: config.LocationID, |
| 118 | + VlanID: network.VlanID, |
| 119 | + PrefixID: prefix, |
| 120 | + ReservationPeriod: uint(anxtypes.IPProvisioningExpires / time.Second), |
| 121 | + Count: 1, |
| 122 | + }) |
| 123 | + if err != nil { |
| 124 | + return "", newError(common.InvalidConfigurationMachineError, "failed to reserve an ip address: %v", err) |
| 125 | + } |
| 126 | + |
| 127 | + if len(res.Data) < 1 { |
| 128 | + return "", newError(common.InsufficientResourcesMachineError, "no ip address is available for this machine") |
| 129 | + } |
| 130 | + |
| 131 | + ip := res.Data[0].Address |
| 132 | + status.ReservedIP = ip |
| 133 | + status.IPState = anxtypes.IPStateUnbound |
| 134 | + status.IPProvisioningExpires = time.Now().Add(anxtypes.IPProvisioningExpires) |
| 135 | + |
| 136 | + return ip, nil |
| 137 | +} |
| 138 | + |
| 139 | +func networkReservedAddresses(status *anxtypes.ProviderStatus) []string { |
| 140 | + ret := make([]string, 0) |
| 141 | + for _, network := range status.Networks { |
| 142 | + for _, address := range network.Addresses { |
| 143 | + if address.ReservedIP != "" && address.IPState == anxtypes.IPStateBound { |
| 144 | + ret = append(ret, address.ReservedIP) |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + return ret |
| 150 | +} |
| 151 | + |
| 152 | +func networkStatusMarkIPsBound(status *anxtypes.ProviderStatus) { |
| 153 | + for network := range status.Networks { |
| 154 | + for addr := range status.Networks[network].Addresses { |
| 155 | + status.Networks[network].Addresses[addr].IPState = anxtypes.IPStateBound |
| 156 | + } |
| 157 | + } |
| 158 | +} |
0 commit comments