asm { //************************************ //See ::/Doc/Credits.DD. _MALLOC:: // Throws 'OutMem' PUSH RBP MOV RBP, RSP PUSH RSI PUSH RDI XOR RBX, RBX MOV RDX, U64 SF_ARG2[RBP] TEST RDX, RDX JNZ @@05 MOV RDX, U64 FS:CTask.addr[RBX] @@05: CMP U32 CTask.task_signature[RDX], TASK_SIGNATURE_VAL #assert CTask.task_signature == CHeapCtrl.hc_signature //location signature same JNE @@10 MOV RDX, U64 CTask.data_heap[RDX] @@10: CMP U32 CHeapCtrl.hc_signature[RDX], HEAP_CTRL_SIGNATURE_VAL JE @@15 PUSH RDX CALL &SysBadMAlloc JMP I32 _SYS_HLT @@15: MOV RAX, U64 SF_ARG1[RBP] PUSHFD ADD RAX, CMemUsed.start + 7 //round-up to I64 AND AL, 0xF8 #assert CMemUsed.start >= sizeof(CMemUnused) CMP RAX, CMemUsed.start JAE @@20 MOV RAX, CMemUsed.start @@20: CLI @@25: LOCK BTS U32 CHeapCtrl.locked_flags[RDX], HClf_LOCKED PAUSE //don't know if this instruction helps JC @@25 CMP RAX, MEM_HEAP_HASH_SIZE JAE @@30 MOV RSI, U64 CHeapCtrl.heap_hash[RAX + RDX] TEST RSI, RSI JZ @@35 MOV RCX, U64 CMemUnused.next[RSI] MOV U64 CHeapCtrl.heap_hash[RAX + RDX], RCX JMP I32 MALLOC_ALMOST_DONE //Big allocation @@30: ADD RAX, sizeof(CMemBlk) + MEM_PAG_SIZE - 1 SHR RAX, MEM_PAG_BITS PUSH RDX //preserve HeapCtrl PUSH RDX PUSH RAX CALL &MemPagTaskAlloc POP RDX TEST RAX, RAX JZ @@45 //Out of memory MOV RSI, RAX MOV EAX, U32 CMemBlk.pags[RSI] SHL RAX, MEM_PAG_BITS SUB RAX, sizeof(CMemBlk) ADD RSI, sizeof(CMemBlk) JMP I32 MALLOC_ALMOST_DONE //Little allocation, chunk-off piece from free list chunks @@35: LEA RSI, U64 CHeapCtrl.malloc_free_list - CMemUnused.next[RDX] @@40: MOV RBX, RSI MOV RSI, U64 CMemUnused.next[RBX] TEST RSI, RSI JNZ I32 @@60 PUSH RAX //-**** save byte size ADD RAX, 16 * MEM_PAG_SIZE - 1 SHR RAX, MEM_PAG_BITS PUSH RDX //preserve HeapCtrl PUSH RDX PUSH RAX CALL &MemPagTaskAlloc POP RDX TEST RAX, RAX JNZ @@50 //Out of memory @@45: LOCK BTR U32 CHeapCtrl.locked_flags[RDX], HClf_LOCKED POPFD PUSH TRUE MOV RAX, 'OutMem' PUSH RAX CALL I32 &throw JMP I32 MALLOC_FINAL_EXIT //Never gets here, hopefully. @@50: MOV RSI, RAX MOV EAX, U32 CMemBlk.pags[RSI] SHL RAX, MEM_PAG_BITS //Can it be combined with last chunk? (Never Free these chunks.) MOV RDI, U64 CHeapCtrl.last_mergable[RDX] LEA RBX, U64 [RSI + RAX] CMP RDI, RBX JNE @@55 PUSH RAX MOV EAX, U32 CMemBlk.pags[RDI] ADD U32 CMemBlk.pags[RSI],EAX //QueueRemove MOV RAX, U64 CMemBlk.next[RDI] MOV RBX, U64 CMemBlk.last[RDI] MOV U64 CMemBlk.last[RAX], RBX MOV U64 CMemBlk.next[RBX], RAX POP RAX @@55: MOV U64 CHeapCtrl.last_mergable[RDX], RSI LEA RSI, U64 sizeof(CMemBlk)[RSI] SUB RAX, sizeof(CMemBlk) LEA RBX, U64 CHeapCtrl.malloc_free_list - CMemUnused.next[RDX] MOV RDI, U64 CMemUnused.next[RBX] MOV U64 CMemUnused.next[RSI], RDI MOV U64 CMemUnused.size[RSI], RAX MOV U64 CMemUnused.next[RBX], RSI POP RAX //+**** JMP @@70 @@60: CMP U64 CMemUnused.size[RSI], RAX JB I32 @@40 JNE @@70 @@65: MOV RDI, U64 CMemUnused.next[RSI] MOV U64 CMemUnused.next[RBX], RDI JMP MALLOC_ALMOST_DONE @@70: SUB U64 CMemUnused.size[RSI], RAX //UPDATE FREE ENTRY CMP U64 CMemUnused.size[RSI], sizeof(CMemUnused) JAE @@75 //take from top of block ADD U64 CMemUnused.size[RSI], RAX //doesn't fit, undo JMP I32 @@40 @@75: ADD RSI, U64 CMemUnused.size[RSI] MALLOC_ALMOST_DONE: //RSI = res - CMemUsed.size //RAX = size + CMemUsed.size //RDX = HeapCtrl ADD U64 CHeapCtrl.used_u8s[RDX], RAX #if _CONFIG_HEAP_DEBUG //QueueInsert MOV RDI, U64 CHeapCtrl.last_um[RDX] MOV U64 CMemUsed.next[RDI], RSI MOV U64 CHeapCtrl.last_um[RDX], RSI MOV U64 CMemUsed.last[RSI], RDI LEA RDI, U64 CHeapCtrl.next_um - CMemUsed.next[RDX] MOV U64 CMemUsed.next[RSI], RDI //Caller1/Caller2 PUSH RDX MOV RDX, U64 [MEM_HEAP_LIMIT] MOV RDI, U64 SF_RIP[RBP] CMP RDI, RDX JB @@80 XOR RDI, RDI MOV U64 CMemUsed.caller1[RSI], RDI JMP @@90 @@80: MOV U64 CMemUsed.caller1[RSI], RDI MOV RDI, U64 SF_RBP[RBP] CMP RDI, RDX JB @@85 XOR RDI, RDI JMP @@90 @@85: MOV RDI, U64 SF_RIP[RDI] CMP RDI, RDX JB @@90 XOR RDI, RDI @@90: MOV U64 CMemUsed.caller2[RSI], RDI POP RDX #endif LOCK BTR U32 CHeapCtrl.locked_flags[RDX], HClf_LOCKED POPFD MOV U64 CMemUsed.size[RSI], RAX MOV U64 CMemUsed.hc[RSI], RDX LEA RAX, U64 CMemUsed.start[RSI] TEST U8 [SYS_SEMAS + SEMA_HEAPLOG_ACTIVE * DEFAULT_CACHE_LINE_WIDTH], 1 JZ @@105 PUSH RAX PUSH RAX MOV RAX, U64 [SYS_EXTERN_TABLE] MOV RAX, U64 EXT_HEAPLOG_MALLOC*8[RAX] TEST RAX, RAX JZ @@95 CALL RAX JMP @@100 @@95: ADD RSP, 8 @@100: POP RAX @@105: TEST U8 [SYS_HEAP_INIT_FLAG], 1 JZ MALLOC_FINAL_EXIT PUSH RAX MOV RCX, U64 CMemUsed.size - CMemUsed.start[RAX] SUB RCX, CMemUsed.start MOV RDI, RAX MOV AL, U8 [SYS_HEAP_INIT_VAL] REP_STOSB POP RAX MALLOC_FINAL_EXIT: POP RDI POP RSI POP RBP RET1 16 //************************************ _FREE:: //Be aware of heap_hash in MemPagTaskAlloc(). PUSH RBP MOV RBP, RSP PUSH RSI PUSH RDI TEST U8 [SYS_SEMAS + SEMA_HEAPLOG_ACTIVE * DEFAULT_CACHE_LINE_WIDTH], 1 JZ @@15 MOV RBX, U64 SF_ARG1[RBP] TEST RBX, RBX JZ @@05 MOV RAX, U64 CMemUsed.size - CMemUsed.start[RBX] TEST RAX, RAX JGE @@05 //Aligned alloced chunks have neg size ADD RBX, RAX @@05: PUSH RBX MOV RAX, U64 [SYS_EXTERN_TABLE] MOV RAX, U64 EXT_HEAPLOG_FREE*8[RAX] TEST RAX, RAX JZ @@10 CALL RAX JMP @@15 @@10: ADD RSP, 8 @@15: MOV RSI, U64 SF_ARG1[RBP] TEST RSI, RSI #if _CONFIG_HEAP_DEBUG JZ I32 FREE_DONE #else JZ FREE_DONE #endif MOV RAX, U64 CMemUsed.size - CMemUsed.start[RSI] TEST RAX, RAX JGE @@20 //Aligned alloced chunks have neg size. //The neg size is offset to start of CMemUsed struct. ADD RSI, RAX @@20: PUSHFD SUB RSI, CMemUsed.start MOV RDX, U64 CMemUsed.hc[RSI] CMP U32 CHeapCtrl.hc_signature[RDX], HEAP_CTRL_SIGNATURE_VAL JE @@25 ADD RSI, CMemUsed.start PUSH RSI CALL &SysBadFree JMP I32 _SYS_HLT @@25: MOV RAX, U64 CMemUsed.size[RSI] SUB U64 CHeapCtrl.used_u8s[RDX], RAX CLI @@30: LOCK BTS U32 CHeapCtrl.locked_flags[RDX], HClf_LOCKED PAUSE JC @@30 #if _CONFIG_HEAP_DEBUG //QueueRemove MOV RDX, U64 CMemUsed.next[RSI] MOV RDI, U64 CMemUsed.last[RSI] MOV U64 CMemUsed.last[RDX], RDI MOV U64 CMemUsed.next[RDI], RDX //Caller1/Caller2 MOV RDX, U64 [MEM_HEAP_LIMIT] MOV RDI, U64 SF_RIP[RBP] CMP RDI, RDX JB @@35 XOR RDI, RDI MOV U64 CMemUnused.caller1[RSI], RDI JMP @@45 @@35: MOV U64 CMemUnused.caller1[RSI], RDI MOV RDI, U64 SF_RBP[RBP] CMP RDI, RDX JB @@40 XOR RDI, RDI JMP @@45 @@40: MOV RDI, U64 SF_RIP[RDI] CMP RDI, RDX JB @@45 XOR RDI, RDI @@45: MOV U64 CMemUnused.caller2[RSI], RDI MOV RDX, U64 CMemUsed.hc[RSI] #endif CMP RAX, MEM_HEAP_HASH_SIZE JAE @@50 #assert CMemUnused.size == CMemUsed.size // MOV U64 CMemUnused.size[RSI], RAX MOV RBX, U64 CHeapCtrl.heap_hash[RAX + RDX] MOV U64 CMemUnused.next[RSI], RBX MOV U64 CHeapCtrl.heap_hash[RAX + RDX], RSI JMP @@55 @@50: SUB RSI, sizeof(CMemBlk) PUSH RDX PUSH RDX PUSH RSI CALL &MemPagTaskFree POP RDX @@55: LOCK BTR U32 CHeapCtrl.locked_flags[RDX], HClf_LOCKED POPFD FREE_DONE: POP RDI POP RSI POP RBP RET1 8 //************************************ _MSIZE:: PUSH RBP MOV RBP, RSP MOV RBX, U64 SF_ARG1[RBP] XOR RAX, RAX TEST RBX, RBX JZ @@10 MOV RAX, U64 CMemUsed.size - CMemUsed.start[RBX] TEST RAX, RAX JGE @@05 //Aligned alloced chunks have neg size ADD RBX, RAX MOV RAX, U64 CMemUsed.size - CMemUsed.start[RBX] @@05: SUB RAX, CMemUsed.start @@10: POP RBP RET1 8 //************************************ _MSIZE2:: PUSH RBP MOV RBP, RSP MOV RBX, U64 SF_ARG1[RBP] XOR RAX, RAX TEST RBX, RBX JZ @@10 MOV RAX, U64 CMemUsed.size-CMemUsed.start[RBX] TEST RAX, RAX JGE @@05 //Aligned alloced chunks have neg size ADD RBX, RAX @@05: MOV RAX, U64 CMemUsed.size - CMemUsed.start[RBX] @@10: POP RBP RET1 8 //************************************ _MHEAP_CTRL:: PUSH RBP MOV RBP, RSP MOV RBX, U64 SF_ARG1[RBP] XOR RAX, RAX TEST RBX, RBX JZ @@10 MOV RAX, U64 CMemUsed.size-CMemUsed.start[RBX] TEST RAX, RAX JGE @@05 //Aligned alloced chunks have neg size ADD RBX, RAX @@05: MOV RAX, U64 CMemUsed.hc - CMemUsed.start[RBX] @@10: POP RBP RET1 8 } _extern _FREE U0 Free(U8 *addr); //Free MAlloc()ed memory chunk. _extern _MSIZE I64 MSize( U8 *src); //Size of heap object. _extern _MSIZE2 I64 MSize2( U8 *src); //Internal size of heap object. _extern _MHEAP_CTRL CHeapCtrl *MHeapCtrl(U8 *src); //CHeapCtrl of object. _extern _MALLOC U8 *MAlloc(I64 size, CTask *mem_task=NULL); //Alloc memory chunk. //Accepts a CTask or CHeapCtrl. NULL allocs off current task's heap. U8 *SysMAlloc(I64 size) {//Alloc memory in System task's heap. return MAlloc(size, sys_task); } U8 *CAlloc(I64 size, CTask *mem_task=NULL) {//Accepts a CTask or CHeapCtrl. NULL allocs off current task's heap. U8 *res = MAlloc(size, mem_task); MemSet(res, 0, size); return res; } U8 *SysCAlloc(I64 size) {//Alloc and set to zero memory in System task's heap. return CAlloc(size, sys_task); } U8 *MAllocIdent(U8 *src, CTask *mem_task=NULL) {//Accepts a CTask or CHeapCtrl. NULL allocs off current task's heap. U8 *res; I64 size; if (!src) return NULL; size = MSize(src); res = MAlloc(size, mem_task); MemCopy(res, src, size); return res; } U8 *SysMAllocIdent(U8 *src) {//Alloc in System task's heap, ident copy of heap node. return MAllocIdent(src, sys_task); } U8 *MAllocAligned(I64 size, I64 alignment, CTask *mem_task=NULL, I64 misalignment=0) {//Only powers of two alignment. This is awful. I64 mask = alignment - 1; U8 *ptr = MAlloc(size + mask + sizeof(I64) + misalignment, mem_task), *res = (ptr + sizeof(I64) + mask) & ~mask + misalignment; res(I64 *)[-1] = ptr - res; #assert offset(CMemUsed.size) == offset(CMemUsed.start) - sizeof(I64) return res; } U8 *CAllocAligned(I64 size, I64 alignment, CTask *mem_task=NULL, I64 misalignment=0) {//Only powers of two alignment. This is awful. I64 mask = alignment-1; U8 *ptr = MAlloc(size + mask + sizeof(I64) + misalignment, mem_task), *res = (ptr + sizeof(I64) + mask) & ~mask + misalignment; res(I64 *)[-1] = ptr - res; #assert offset(CMemUsed.size) == offset(CMemUsed.start) - sizeof(I64) MemSet(res, 0, size); return res; } U8 *ReAlloc(U8 *ptr, U64 new_size, CTask *mem_task=NULL) {//Resize previously MAlloc'ed chunk. If new_size is zero then act as Free. //If pointer is NULL then act as MAlloc. if both are NULL/0 does nothing (Free(NULL)) //Useless for changing chunk sizes smaller than 8 bytes because MAlloc allocs 8 bytes at a time. U8 *res; if (!new_size) { Free(ptr); //we can free NULL return NULL; } res = MAlloc(new_size, mem_task); if (!ptr) return res; MemCopy(res, ptr, MinI64(MSize(ptr), new_size)); Free(ptr); return res; } U8 *SysReAlloc(U8 *ptr, I64 new_size) {//Realloc in System task's heap. return ReAlloc(ptr, new_size, sys_task); } U8 *StrNew(U8 *buf, CTask *mem_task=NULL) {//Accepts a CTask or CHeapCtrl. NULL allocs off current task's heap. U8 *res; I64 size; if (buf) { size = StrLen(buf) + 1; res = MAlloc(size, mem_task); MemCopy(res, buf, size); } else { res = MAlloc(1, mem_task); *res = 0; } return res; } U8 *SysStrNew(U8 *buf) {//Alloc copy of string in System task's heap. return StrNew(buf, sys_task); }