ZealOS/src/Kernel/StrA.ZC

762 lines
15 KiB
HolyC
Raw Normal View History

U8 *StrPrintHex(U8 *dst, I64 num; I64 width)
2020-02-15 20:01:48 +00:00
{
U8 *res = dst + width;
dst = res;
while (width--)
{
*--dst = "0123456789ABCDEF"(U8 *)[num & 15];
num >>= 4;
}
return res;
2020-02-15 20:01:48 +00:00
}
U0 PutHex(I64 num, I64 width)
2020-02-15 20:01:48 +00:00
{
U8 buf[17];
if (width > 16)
width = 16;
*StrPrintHex(buf, num, width) = 0;
"%s", buf;
2020-02-15 20:01:48 +00:00
}
asm {
// IN: RAX=NUM TO PRINT
PUT_HEX_U64::
PUSH_C_REGS
PUSH 16
PUSH RAX
CALL &PutHex
POP_C_REGS
RET
2020-02-15 20:01:48 +00:00
PUT_HEX_U32::
PUSH_C_REGS
PUSH 8
PUSH RAX
CALL &PutHex
POP_C_REGS
RET
2020-02-15 20:01:48 +00:00
PUT_HEX_U16::
PUSH_C_REGS
PUSH 4
PUSH RAX
CALL &PutHex
POP_C_REGS
RET
2020-02-15 20:01:48 +00:00
PUT_HEX_U8::
PUSH_C_REGS
PUSH 2
PUSH RAX
CALL &PutHex
POP_C_REGS
RET
2020-02-15 20:01:48 +00:00
PUT_CHARS::
// IN: RAX=Char
PUSH_C_REGS
PUSH RAX
CALL &PutChars
POP_C_REGS
RET
2020-02-15 20:01:48 +00:00
PUT_STR::
// IN: RSI=String
PUSH_C_REGS
PUSH RSI
CALL &PutS
POP_C_REGS
RET
_STRCOPY::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RDI, U64 SF_ARG1[RBP]
TEST RDI, RDI
JZ @@15
MOV RSI, U64 SF_ARG2[RBP]
TEST RSI, RSI
JNZ @@05
XOR RAX, RAX
JMP @@10
@@05: LODSB
@@10: STOSB
TEST AL, AL
JNZ @@05
@@15: POP RDI
POP RSI
POP RBP
RET1 16
_STRCOMPARE::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RSI, U64 SF_ARG2[RBP]
MOV RDI, U64 SF_ARG1[RBP]
@@05: LODSB
TEST AL, AL
JZ @@20
SCASB
JE @@05
JA @@15
@@10: MOV RAX, 1
JMP @@25
@@15: MOV RAX, -1
JMP @@25
@@20: SCASB
JNE @@10
XOR RAX, RAX
@@25: POP RDI
POP RSI
POP RBP
RET1 16
2020-02-15 20:01:48 +00:00
TO_UPPER::
CMP AL, 'a'
JB @@05
CMP AL, 'z'
JA @@05
ADD AL, 'A' - 'a'
@@05: RET
_STRICOMPARE::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RSI, U64 SF_ARG2[RBP]
MOV RDI, U64 SF_ARG1[RBP]
@@05: LODSB
TEST AL, AL
JZ @@30
CMP AL, 'a'
JB @@10
CMP AL, 'z'
JA @@10
ADD AL, 'A' - 'a'
@@10: MOV BL, U8 [RDI]
INC RDI
CMP BL, 'a'
JB @@15
CMP BL, 'z'
JA @@15
ADD BL, 'A' - 'a'
@@15: CMP AL, BL
JE @@05
JA @@25
@@20: MOV RAX, 1
JMP @@35
@@25: MOV RAX, -1
JMP @@35
@@30: MOV BL, U8 [RDI]
TEST BL, BL
JNE @@20
XOR RAX, RAX
@@35: POP RDI
POP RSI
POP RBP
RET1 16
_STRNCOMPARE::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RCX, U64 SF_ARG3[RBP]
MOV RSI, U64 SF_ARG2[RBP]
MOV RDI, U64 SF_ARG1[RBP]
@@05: TEST RCX, RCX
JZ @@25
DEC RCX
LODSB
TEST AL, AL
JZ @@20
SCASB
JE @@05
JA @@15
@@10: MOV RAX, 1
JMP @@30
@@15: MOV RAX, -1
JMP @@30
@@20: MOV BL, U8 [RDI]
TEST BL, BL
JNE @@10
@@25: XOR RAX, RAX
@@30: POP RDI
POP RSI
POP RBP
RET1 24
_STRNICOMPARE::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RCX, U64 SF_ARG3[RBP]
MOV RSI, U64 SF_ARG2[RBP]
MOV RDI, U64 SF_ARG1[RBP]
@@05: TEST RCX, RCX
JZ @@35
DEC RCX
LODSB
TEST AL, AL
JZ @@30
CMP AL, 'a'
JB @@10
CMP AL, 'z'
JA @@10
ADD AL, 'A' - 'a'
@@10: MOV BL, U8 [RDI]
INC RDI
CMP BL, 'a'
JB @@15
CMP BL, 'z'
JA @@15
ADD BL, 'A' - 'a'
@@15: CMP AL, BL
JE @@05
JA @@25
@@20: MOV RAX, 1
JMP @@40
@@25: MOV RAX, -1
JMP @@40
@@30: SCASB
JNE @@20
@@35: XOR RAX, RAX
@@40: POP RDI
POP RSI
POP RBP
RET1 24
2020-02-15 20:01:48 +00:00
_STRMATCH::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RSI, U64 SF_ARG2[RBP]
TEST RSI, RSI
JZ @@25
MOV RDI, U64 SF_ARG1[RBP]
TEST RDI, RDI
JZ @@25
MOV DL, U8 [RDI]
TEST DL, DL
JZ @@20
JMP @@10
@@05: INC RSI
@@10: LODSB
TEST AL, AL
JZ @@25
CMP AL, DL
JNE @@10
DEC RSI
MOV RCX, 1
@@15: MOV AL, U8 [RDI + RCX]
TEST AL, AL
JZ @@20
CMP AL, U8 [RSI + RCX]
JNE @@05
INC RCX
JMP @@15
DEC RSI
@@20: MOV RAX, RSI
JMP @@30
@@25: XOR RAX, RAX
@@30: POP RDI
POP RSI
POP RBP
RET1 16
2020-02-15 20:01:48 +00:00
_STRIMATCH::
PUSH RBP
MOV RBP, RSP
PUSH RSI
PUSH RDI
MOV RSI, U64 SF_ARG2[RBP]
TEST RSI, RSI
JZ @@25
MOV RDI, U64 SF_ARG1[RBP]
TEST RDI, RDI
JZ @@25
MOV AL, U8 [RDI]
CALL TO_UPPER
MOV DL, AL
TEST DL, DL
JZ @@20
JMP @@10
@@05: INC RSI
@@10: LODSB
CALL TO_UPPER
TEST AL, AL
JZ @@25
CMP AL, DL
JNE @@10
DEC RSI
MOV RCX, 1
@@15: MOV AL, U8 [RDI + RCX]
CALL TO_UPPER
TEST AL, AL
JZ @@20
MOV BL, U8 [RSI + RCX]
XCHG AL, BL
CALL TO_UPPER
CMP AL, BL
JNE @@05
INC RCX
JMP @@15
DEC RSI
@@20: MOV RAX, RSI
JMP @@30
@@25: XOR RAX, RAX
@@30: POP RDI
POP RSI
POP RBP
RET1 16
2020-02-15 20:01:48 +00:00
}
_extern _STRCOMPARE I64 StrCompare(U8 *st1, U8 *st2); //Compare two strings.
_extern _STRICOMPARE I64 StrICompare(U8 *st1, U8 *st2); //Compare two strings, ignoring case.
_extern _STRNCOMPARE I64 StrNCompare(U8 *st1, U8 *st2, I64 n); //Compare N bytes in two strings.
_extern _STRNICOMPARE I64 StrNICompare(U8 *st1, U8 *st2, I64 n); //Compare N bytes in two strings, ignoring case.
_extern _STRMATCH U8 *StrMatch(U8 *needle, U8 *haystack_str);//Scan for string in string.
_extern _STRIMATCH U8 *StrIMatch(U8 *needle, U8 *haystack_str);//Scan for string in string, ignoring case.
_extern _STRCOPY U0 StrCopy(U8 *dst, U8 *src); //Copy string.
2020-02-15 20:01:48 +00:00
//These bitmaps go to 0-511 so that $LK,"Lex",A="MN:Lex"$() can use them with $LK,"Token Codes",A="MN:TK_EOF"$.
U32
char_bmp_alpha[16] =
{0x0000000, 0x00000000, 0x87FFFFFF, 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_alpha_numeric[16] =
{0x0000000, 0x03FF0000, 0x87FFFFFF, 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_alpha_numeric_no_at[16] =
{0x0000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_word[16] =
{0x0000000, 0x03FF0080, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_filename[16] =
{0x0000000, 0x03FF73FB, 0xEFFFFFFF, 0x6FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_dec_numeric[16] =
{0x0000000, 0x03FF0000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_hex_numeric[16] =
{0x0000000, 0x03FF0000, 0x7E, 0x7E, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_white_space[16] =
{0x80002600, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_non_eol_white_space[16] =
{0x80000200, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_zero_cr_nl_cursor[16] =
{0x00002421, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_zero_tab_cr_nl_cursor[16] =
{0x00002621, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_zero_tab_cr_nl_cursor_dollar[16] =
{0x00002621, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_macro[16] =
{0x80002600, 0xFFFFFFDF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_printable[16] =
{0x80002600, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_displayable[16] =
{0x80000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
char_bmp_safe_dollar[16] =
{0x80000000, 0xFFFFFFEF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0, 0, 0, 0, 0},
//same but no dollar sign
char_bmp_non_eol[16] =
{0xFFFFDBFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
U8 *ListSub(I64 sub, U8 *list)
2020-02-16 00:26:51 +00:00
{//Point to list entry.
//Not efficient. Use an array of U8 pointers for efficiency.
if (!list)
return NULL;
while (*list && sub > 0)
{
while (*list) //Advance to end of cur entry.
list++;
list++; //Skip trailing zero.
if (*list == '@') //Check for '@' alias list entry.
list++;
else
sub--;
}
if (sub || !*list)
return NULL;
else
return list;
2020-02-15 20:01:48 +00:00
}
I64 ListMatch(U8 *needle, U8 *haystack_list, I64 flags=0)
2020-02-15 20:01:48 +00:00
{//-2 if Ambiguous
// -1 if not found
// Not efficient. Use hash tables for efficiency.
I64 n, sub = 0, res = -1;
U8 *ptr;
Bool exact_match = FALSE;
if (!haystack_list)
return -1;
n = StrLen(needle);
while (*haystack_list)
{
if (*haystack_list == '@')
{ //Check for '@' alias haystack_list entry
sub--;
haystack_list++;
}
ptr = needle;
if (flags & LMF_IGNORE_CASE)
while (*ptr && ToUpper(*ptr) == ToUpper(*haystack_list))
{
ptr++;
haystack_list++;
}
else
while (*ptr && *ptr == *haystack_list)
{
ptr++;
haystack_list++;
}
if (!*ptr)
{ //Did we reach end of needle?
if (!*haystack_list) //Did we reach end of haystack_list?
return sub; //Found Exact match
else
{
if (res != -1)
{
if (!exact_match)
res = -2; //Ambiguous unless later exact match.
}
else
{
if (!(flags & LMF_EXACT))
res = sub;
}
}
}
while (*haystack_list) //Advance to end of cur entry.
haystack_list++;
haystack_list++; //Skip trailing zero
sub++;
2020-02-15 20:01:48 +00:00
}
return res;
2020-02-15 20:01:48 +00:00
}
I64 StrOcc(U8 *src, I64 ch)
{//Count occurrences of a char.
I64 i = 0;
if (!src)
return 0;
while (*src)
if (*src++ == ch)
i++;
return i;
2020-02-15 20:01:48 +00:00
}
I64 Spaces2Tabs(U8 *dst, U8 *src)
2020-02-15 20:01:48 +00:00
{//Src buf with spaces to dst buf without.
U8 *src2;
I64 chged = 0, space_count, space_count2, col = 0;
if (*src)
while (TRUE)
{
src2 = src;
while (*src2 == CH_SPACE)
src2++;
space_count = src2 - src;
while (col + space_count >= 4)
{
space_count2 = 4 - col;
if (space_count2 == 1)
*dst++ = CH_SPACE;
else
{
*dst++ = '\t';
chged += space_count2 - 1;
}
space_count -= space_count2;
col = 0;
}
if (*src2 == '\t')
{
if (space_count == 1 && col == 3)
*dst++ = CH_SPACE;
else
chged += space_count;
*dst++ = '\t';
col = 0;
}
else
{
while (space_count--)
{
*dst++ = CH_SPACE;
if (++col == 4)
col = 0;
}
if (*src2)
{
*dst++ = *src2;
if (++col == 4)
col = 0;
}
else
break;
}
src = ++src2;
}
*dst = 0;
return chged;
2020-02-15 20:01:48 +00:00
}
U8 *StrUtil(U8 *_src, I64 flags)
2020-02-15 20:01:48 +00:00
{//Modifies in place. See $LK,"flags",A="MN:SUF_REM_SPACES"$ for all the options.
U8 *src = _src, *dst = _src;
I64 ch;
if (flags & SUF_REM_LEADING)
while (Bt(char_bmp_white_space, *src))
src++;
while (ch = *src++)
if (Bt(char_bmp_white_space, ch))
{
if (!(flags & SUF_REM_SPACES))
{
if (flags & SUF_SINGLE_SPACE)
{
*dst++ = CH_SPACE;
while ((ch = *src++) && Bt(char_bmp_white_space, ch));
src--;
}
else
*dst++ = ch;
}
}
else if (!(flags & SUF_REM_CTRL_CHARS) || ch >= CH_SPACE)
*dst++ = ch;
*dst = 0;
if (flags & SUF_REM_TRAILING)
while (dst != _src && (!*dst || Bt(char_bmp_white_space, *dst)))
*dst-- = 0;
if (flags & SUF_TO_UPPER)
for (dst = _src; *dst; dst++)
{
ch = *dst;
if ('a' <= ch <= 'z')
*dst = ch - 0x20;
}
if (flags & SUF_TO_LOWER)
for (dst = _src; *dst; dst++)
{
ch = *dst;
if ('A' <= ch <= 'Z')
*dst = ch + 0x20;
}
if (flags & SUF_SAFE_DOLLAR)
for (dst = _src; *dst; dst++)
{
ch = *dst;
if (!Bt(char_bmp_safe_dollar, *dst))
*dst = '.';
}
if (flags & SUF_S2T)
Spaces2Tabs(_src, _src);
return _src;
2020-02-15 20:01:48 +00:00
}
U8 *StrFirstOcc(U8 *src, U8 *marker)
2020-02-15 20:01:48 +00:00
{//Point to 1st occurrence of marker set in str.
I64 ch;
while ((ch = *src++) && !StrOcc(marker, ch));
if (ch)
return src - 1;
else
return NULL;
2020-02-15 20:01:48 +00:00
}
U8 *StrFirstRemove(U8 *src, U8 *marker, U8 *dst=NULL)
2020-02-15 20:01:48 +00:00
{//Remove first str segment and place in dst buf or NULL.
I64 ch;
U8 *ptr = src, *res = dst;
if (dst)
{
while ((ch = *ptr++) && !StrOcc(marker, ch))
*dst++ = ch;
*dst = 0;
}
else
while ((ch = *ptr++) && !StrOcc(marker, ch));
if (ch)
StrCopy(src, ptr);
else
*src = 0;
return res;
2020-02-15 20:01:48 +00:00
}
U8 *StrLastOcc(U8 *src, U8 *marker)
2020-02-15 20:01:48 +00:00
{//Point to last occurrence of market set in str.
I64 ch;
U8 *res = NULL;
while (ch = *src++)
if (StrOcc(marker, ch))
res = src - 1;
return res;
2020-02-15 20:01:48 +00:00
}
U8 *StrLastRemove(U8 *src, U8 *marker, U8 *dst=NULL)
2020-02-15 20:01:48 +00:00
{//Remove last str segment and place in dst buf or NULL.
U8 *ptr;
if (ptr = StrLastOcc(src, marker))
{
if (dst)
StrCopy(dst, ptr + 1);
*ptr = 0;
}
else
{
if (dst)
StrCopy(dst, src);
*src = 0;
}
return dst;
2020-02-15 20:01:48 +00:00
}
U8 *StrFind(U8 *needle, U8 *haystack_str, I64 flags=0)
2020-02-15 20:01:48 +00:00
{//Find needle_str in haystack_str with options.
Bool cont;
U8 *saved_haystack_str = haystack_str;
I64 plen = StrLen(needle);
do
{
cont = FALSE;
if (flags & SFF_IGNORE_CASE)
haystack_str = StrIMatch(needle, haystack_str);
else
haystack_str = StrMatch(needle, haystack_str);
if (haystack_str && flags & SFF_WHOLE_LABELS_BEFORE && haystack_str != saved_haystack_str &&
Bt(char_bmp_alpha_numeric, *(haystack_str - 1)))
{
haystack_str++;
if (*haystack_str)
cont = TRUE;
else
haystack_str = NULL;
}
if (haystack_str && flags & SFF_WHOLE_LABELS_AFTER && Bt(char_bmp_alpha_numeric, *(haystack_str + plen)))
{
haystack_str++;
if (*haystack_str)
cont = TRUE;
else
haystack_str = NULL;
}
}
while (cont);
return haystack_str;
2020-02-15 20:01:48 +00:00
}
U8 *StrReplace(U8 *str, U8 *old, U8 *new, I64 sff_flags=NONE, Bool free_str=FALSE)
{//Replace all instances of old with new in str. New MAlloc()ed string. free_str aids in chain replacement.
U8 *str_start, *str_end = str, *str_loc, *tmpm = NULL;
2020-03-21 00:35:06 +00:00
if (!*old)
{
str_start = StrNew(new);
goto sr_end2;
}
if (!StrCompare(old, new))
goto sr_end;
2020-03-21 00:35:06 +00:00
while (str_loc = str_end = StrFind(old, str_end, sff_flags))
2020-03-21 00:35:06 +00:00
{
str_start = str;
str_end += StrLen(old); //Move start marker past old str, cutting it out
str_start[StrLen(str_start) - StrLen(str_loc)] = '\0'; //End str_start right before where old was
Free(tmpm);
tmpm = MStrPrint("%s%s%s", str_start, new, str_end);
str = tmpm;
}
sr_end:
str_start = StrNew(str);
sr_end2:
if (free_str)
Free(str);
return str_start;
2020-03-21 00:35:06 +00:00
}
Bool WildMatch(U8 *test_str, U8 *wild_str)
2020-02-15 20:01:48 +00:00
{//Wildcard match with '*' and '?'.
I64 ch1, ch2;
U8 *fall_back_src = NULL, *fall_back_wild = NULL;
while (TRUE)
{
if (!(ch1 = *test_str++))
{
if (*wild_str && *wild_str != '*')
return FALSE;
else
return TRUE;
}
else
{
if (!(ch2 = *wild_str++))
return FALSE;
else
{
if (ch2 == '*')
{
fall_back_wild = wild_str - 1;
fall_back_src = test_str;
if (!(ch2 = *wild_str++))
return TRUE;
while (ch2 != ch1)
if (!(ch1 = *test_str++))
return FALSE;
}
else
if (ch2 != '?' && ch1 != ch2)
{
if (fall_back_wild)
{
wild_str = fall_back_wild;
test_str = fall_back_src;
fall_back_wild = NULL;
fall_back_src = NULL;
}
else
return FALSE;
}
}
}
}
2020-02-15 20:01:48 +00:00
}