@@ -39,9 +39,15 @@ const (
3939var empty struct {}
4040
4141type Node struct {
42- Id string
43- Name string
42+ // Unique id and index of this node.
43+ // TODO(albin): Having both id and index is redundant.
44+ // Currently, the id is "cluster name" + "node name" and index an integer assigned on node creation.
45+ Id string
46+ Index uint64
47+
48+ // Executor this node belongs to and node name, which must be unique per executor.
4449 Executor string
50+ Name string
4551
4652 // We need to store taints and labels separately from the node type: the latter only includes
4753 // indexed taints and labels, but we need all of them when checking pod requirements.
@@ -65,9 +71,11 @@ type Node struct {
6571// shallow copies of fields that are not mutated by methods of NodeDb.
6672func (node * Node ) UnsafeCopy () * Node {
6773 return & Node {
68- Id : node .Id ,
69- Name : node .Name ,
74+ Id : node .Id ,
75+ Index : node .Index ,
76+
7077 Executor : node .Executor ,
78+ Name : node .Name ,
7179
7280 Taints : node .Taints ,
7381 Labels : node .Labels ,
@@ -139,16 +147,19 @@ func (nodeDb *NodeDb) create(node *schedulerobjects.Node) (*Node, error) {
139147 nodeDb.indexedNodeLabelValues [key ][value ] = empty
140148 }
141149 }
150+ index := uint64 (nodeDb .numNodes )
142151 nodeDb .numNodes ++
143152 nodeDb .numNodesByNodeType [nodeType .Id ]++
144153 nodeDb .totalResources .Add (totalResources )
145154 nodeDb .nodeTypes [nodeType .Id ] = nodeType
146155 nodeDb .mu .Unlock ()
147156
148157 entry := & Node {
149- Id : node .Id ,
150- Name : node .Name ,
158+ Id : node .Id ,
159+ Index : index ,
160+
151161 Executor : node .Executor ,
162+ Name : node .Name ,
152163
153164 Taints : taints ,
154165 Labels : labels ,
@@ -256,8 +267,10 @@ type NodeDb struct {
256267 //
257268 // Lower resolution makes scheduling faster, but may lead to jobs incorrectly being considered unschedulable.
258269 indexedResourceResolutionMillis []int64
259- // Map from priority class priority to the index tracking allocatable resources at that priority.
270+ // Map from priority class priority to the database index tracking allocatable resources at that priority.
260271 indexNameByPriority map [int32 ]string
272+ // Map from priority class priority to the index of node.keys corresponding to that priority.
273+ keyIndexByPriority map [int32 ]int
261274 // Taint keys that to create indexes for.
262275 // Should include taints frequently used for scheduling.
263276 // Since the NodeDb can efficiently sort out nodes with taints not tolerated
@@ -317,7 +330,7 @@ func NewNodeDb(
317330 nodeDbPriorities = append (nodeDbPriorities , types .AllowedPriorities (priorityClasses )... )
318331
319332 indexedResourceNames := util .Map (indexedResources , func (v configuration.IndexedResource ) string { return v .Name })
320- schema , indexNameByPriority := nodeDbSchema (nodeDbPriorities , indexedResourceNames )
333+ schema , indexNameByPriority , keyIndexByPriority := nodeDbSchema (nodeDbPriorities , indexedResourceNames )
321334 db , err := memdb .NewMemDB (schema )
322335 if err != nil {
323336 return nil , errors .WithStack (err )
@@ -359,6 +372,7 @@ func NewNodeDb(
359372 func (v configuration.IndexedResource ) int64 { return v .Resolution .MilliValue () },
360373 ),
361374 indexNameByPriority : indexNameByPriority ,
375+ keyIndexByPriority : keyIndexByPriority ,
362376 indexedTaints : mapFromSlice (indexedTaints ),
363377 indexedNodeLabels : mapFromSlice (indexedNodeLabels ),
364378 indexedNodeLabelValues : indexedNodeLabelValues ,
@@ -432,7 +446,7 @@ func (nodeDb *NodeDb) IndexedNodeLabelValues(label string) (map[string]struct{},
432446func (nodeDb * NodeDb ) NumNodes () int {
433447 nodeDb .mu .Lock ()
434448 defer nodeDb .mu .Unlock ()
435- return nodeDb .numNodes
449+ return int ( nodeDb .numNodes )
436450}
437451
438452func (nodeDb * NodeDb ) TotalResources () schedulerobjects.ResourceList {
@@ -791,11 +805,16 @@ func (nodeDb *NodeDb) selectNodeForPodAtPriority(
791805 if ! ok {
792806 return nil , errors .Errorf ("no index for priority %d; must be in %v" , priority , nodeDb .indexNameByPriority )
793807 }
808+ keyIndex , ok := nodeDb .keyIndexByPriority [priority ]
809+ if ! ok {
810+ return nil , errors .Errorf ("no key index for priority %d; must be in %v" , priority , nodeDb .keyIndexByPriority )
811+ }
794812 it , err := NewNodeTypesIterator (
795813 txn ,
796814 matchingNodeTypeIds ,
797815 indexName ,
798816 priority ,
817+ keyIndex ,
799818 nodeDb .indexedResources ,
800819 indexResourceRequests ,
801820 nodeDb .indexedResourceResolutionMillis ,
@@ -1158,7 +1177,7 @@ func (nodeDb *NodeDb) Upsert(node *Node) error {
11581177func (nodeDb * NodeDb ) UpsertWithTxn (txn * memdb.Txn , node * Node ) error {
11591178 keys := make ([][]byte , len (nodeDb .nodeDbPriorities ))
11601179 for i , p := range nodeDb .nodeDbPriorities {
1161- keys [i ] = nodeDb .nodeDbKey (keys [i ], node .NodeTypeId , node .AllocatableByPriority [p ])
1180+ keys [i ] = nodeDb .nodeDbKey (keys [i ], node .NodeTypeId , node .AllocatableByPriority [p ], node . Index )
11621181 }
11631182 node .Keys = keys
11641183
@@ -1204,38 +1223,40 @@ func (nodeDb *NodeDb) AddEvictedJobSchedulingContextWithTxn(txn *memdb.Txn, inde
12041223 return nil
12051224}
12061225
1207- func nodeDbSchema (priorities []int32 , resources []string ) (* memdb.DBSchema , map [int32 ]string ) {
1208- nodesTable , indexNameByPriority := nodesTableSchema (priorities , resources )
1226+ func nodeDbSchema (priorities []int32 , resources []string ) (* memdb.DBSchema , map [int32 ]string , map [ int32 ] int ) {
1227+ nodesTable , indexNameByPriority , keyIndexByPriority := nodesTableSchema (priorities , resources )
12091228 evictionsTable := evictionsTableSchema ()
12101229 return & memdb.DBSchema {
12111230 Tables : map [string ]* memdb.TableSchema {
12121231 nodesTable .Name : nodesTable ,
12131232 evictionsTable .Name : evictionsTable ,
12141233 },
1215- }, indexNameByPriority
1234+ }, indexNameByPriority , keyIndexByPriority
12161235}
12171236
1218- func nodesTableSchema (priorities []int32 , resources []string ) (* memdb.TableSchema , map [int32 ]string ) {
1237+ func nodesTableSchema (priorities []int32 , resources []string ) (* memdb.TableSchema , map [int32 ]string , map [ int32 ] int ) {
12191238 indexes := make (map [string ]* memdb.IndexSchema , len (priorities )+ 1 )
12201239 indexes ["id" ] = & memdb.IndexSchema {
12211240 Name : "id" ,
12221241 Unique : true ,
12231242 Indexer : & memdb.StringFieldIndex {Field : "Id" },
12241243 }
12251244 indexNameByPriority := make (map [int32 ]string , len (priorities ))
1245+ keyIndexByPriority := make (map [int32 ]int , len (priorities ))
12261246 for i , priority := range priorities {
12271247 name := nodeIndexName (i )
12281248 indexNameByPriority [priority ] = name
1249+ keyIndexByPriority [priority ] = i
12291250 indexes [name ] = & memdb.IndexSchema {
12301251 Name : name ,
1231- Unique : false ,
1252+ Unique : true ,
12321253 Indexer : & NodeIndex {KeyIndex : i },
12331254 }
12341255 }
12351256 return & memdb.TableSchema {
12361257 Name : "nodes" ,
12371258 Indexes : indexes ,
1238- }, indexNameByPriority
1259+ }, indexNameByPriority , keyIndexByPriority
12391260}
12401261
12411262func evictionsTableSchema () * memdb.TableSchema {
@@ -1278,12 +1299,13 @@ func (nodeDb *NodeDb) stringFromPodRequirementsNotMetReason(reason PodRequiremen
12781299// nodeDbKey returns the index key for a particular node.
12791300// Allocatable resources are rounded down to the closest multiple of nodeDb.indexedResourceResolutionMillis.
12801301// This improves efficiency by reducing the number of distinct values in the index.
1281- func (nodeDb * NodeDb ) nodeDbKey (out []byte , nodeTypeId uint64 , allocatable schedulerobjects.ResourceList ) []byte {
1302+ func (nodeDb * NodeDb ) nodeDbKey (out []byte , nodeTypeId uint64 , allocatable schedulerobjects.ResourceList , nodeIndex uint64 ) []byte {
12821303 return RoundedNodeIndexKeyFromResourceList (
12831304 out ,
12841305 nodeTypeId ,
12851306 nodeDb .indexedResources ,
12861307 nodeDb .indexedResourceResolutionMillis ,
12871308 allocatable ,
1309+ nodeIndex ,
12881310 )
12891311}
0 commit comments