-
Notifications
You must be signed in to change notification settings - Fork 7
/
main.py
executable file
·134 lines (123 loc) · 4.91 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys
import copy
# Project libs
from lib.create_pod_list import CreatPodList
from lib.log_logger import logger, level
from lib.node import Node
from lib.node_list import Nodes
from lib.cvs_loader import csv_to_json
from lib.arg_parser import parse_args
from lib.result_printer import print_results
import lib.helpers as helpers
# Config
from config import MIN_WORKERS, COMPUTE_CPU, COMPUTE_MEM, ALLOCATION_PERCENT
# CONSTANTS/FLAGS
SCHEDULING = 1
FAULTSIMULATION = 2
def run_allocations(pods, mode=SCHEDULING, fault_simulation=None, excluded_node=None):
"""
Pod allocation
:param pods: pods to be allocated
:param mode: Mode for allocation: SCHEDULING - normal allocation; FAULTSIMULATION - simulates one node failure and rebalance it's pods on other nodes
:param fault_simulation: node_list for simulation
:param excluded_node: Exclude failed node from nodes for scheduling
:return: no
"""
# Allocate nodes
for _pod in pods:
if mode == SCHEDULING:
new_node = False
if not (_node := node_list.find_node(_pod)):
logger.warning("Can not find schedulable node. Adding new one")
_node = Node(
name=len(node_list),
mem_total=COMPUTE_MEM,
cpu_total=COMPUTE_CPU,
allocation=ALLOCATION_PERCENT,
)
node_list.add_node(_node)
new_node = True
if new_node and (
_node.cpu_available < _pod["cpu"] or _node.mem_available < _pod["mem"]
):
if _node.cpu_available < _pod["cpu"]:
logger.error(
f"FAILED: Can not allocate pod {_pod['app']}, on node {_node.name} "
f"as pod CPU requirements higher than vCPU on server "
f"Node CPU>: {_node.cpu_available} < {_pod['cpu']} "
)
sys.exit(255)
elif _node.mem_available < _pod["mem"]:
logger.error(
f"FAILED: Can not allocate pod {_pod['app']}, on node {_node.name} "
f"as pod Memory requirements higher than physical memory. "
f"Mem: {_node.mem_available} < {_pod['mem']}"
)
sys.exit(255)
print_results(args, node_list, summary_only=True)
sys.exit(255)
_node.add_pod(_pod)
elif mode == FAULTSIMULATION:
if not (_node := node_list.find_node(_pod, exclude_node=excluded_node)):
logger.error(
f"FAILED: Can not evict {_pod.get('app')} from failed node {excluded_node.name}\n"
f"Reconsider ALLOCATION_PERCENT values, it's {ALLOCATION_PERCENT}% now\n"
f"Allocated nodes: {len(node_list.node_list)}"
)
print_results(args, node_list, summary_only=True)
sys.exit(255)
# elif _node == excluded_node:
# continue
else:
# When pod can not be scheduled, because it's already has a copy
_node.add_pod(_pod)
def run_simulation():
"""
Simulation of failed node
ATM only one node is supported
:return: none
"""
global node_list
print("Simulating node failure. Anti-Affinity violations will be ignored")
for failed_node in copy.deepcopy(node_list).node_list:
fault_simulation_copy = copy.deepcopy(node_list)
logger.info(f"Running Simulation for {failed_node.name}")
for i, o in enumerate(node_list.node_list):
if o.name == failed_node.name:
del node_list.node_list[i]
break
run_allocations(
failed_node.pods,
mode=FAULTSIMULATION,
excluded_node=failed_node.reset_allocation(),
)
print(f"Result of simulation for failed node {failed_node.name}")
print_results(args, node_list, summary_only=True)
node_list = copy.deepcopy(fault_simulation_copy)
if __name__ == "__main__":
# Init classes
args = parse_args()
node_list = Nodes()
# Init vars
apps = sorted(csv_to_json(args.filename), key=lambda i: i["affinity"], reverse=True)
pods_list = CreatPodList.add_pods(apps)
logger.info(
f"Starting allocation, there are {len(apps)} apps to be allocated. Log level {level}"
)
# Create minimum workers pools
for _ in range(MIN_WORKERS):
node_list.add_node(
node=Node(
name=len(node_list),
mem_total=COMPUTE_MEM,
cpu_total=COMPUTE_CPU,
allocation=ALLOCATION_PERCENT,
)
)
# Runa allocations
run_allocations(pods_list)
# Print Results
print_results(args, node_list)
# Run Fault simulation
if helpers.do_faultsim():
run_simulation()