Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

builtin: Add is_hex(), is_int(), is_bin(), and is_oct() by strings - Feature #20540

Merged
merged 5 commits into from Jan 16, 2024
Merged

builtin: Add is_hex(), is_int(), is_bin(), and is_oct() by strings - Feature #20540

merged 5 commits into from Jan 16, 2024

Conversation

viniciusfdasilva
Copy link
Contributor

@viniciusfdasilva viniciusfdasilva commented Jan 15, 2024

Code

// Check if a string is an octal value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_oct() bool {
	mut i := 0

	if str[i] == `0` {
		i++
	} else if str[i] == `-` || str[i] == `+` {
		i++

		if str[i] == `0` {
			i++
		} else {
			return false
		}
	} else {
		return false
	}

	if str[i] == `o` {
		i++
	} else {
		return false
	}

	if i == str.len {
		return false
	}

	for i < str.len {
		if str[i] < `0` || str[i] > `7` {
			return false
		}
		i++
	}

	return true
}

// Check if a string is an binary value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_bin() bool {
	mut i := 0

	if str[i] == `0` {
		i++
	} else if str[i] == `-` || str[i] == `+` {
		i++

		if str[i] == `0` {
			i++
		} else {
			return false
		}
	} else {
		return false
	}

	if str[i] == `b` {
		i++
	} else {
		return false
	}

	if i == str.len {
		return false
	}

	for i < str.len {
		if str[i] < `0` || str[i] > `1` {
			return false
		}
		i++
	}

	return true
}

// Check if a string is an hexadecimal value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_hex() bool {
	mut i := 0

	if str[i] == `0` {
		i++
	} else if str[i] == `-` || str[i] == `+` {
		i++

		if str[i] == `0` {
			i++
		} else {
			return false
		}
	} else {
		return false
	}

	if str[i] == `x` {
		i++
	} else {
		return false
	}

	if i == str.len {
		return false
	}

	for i < str.len {
		if (str[i] < `0` || str[i] > `9`) && ((str[i] < `a` || str[i] > `f`)
			&& (str[i] < `A` || str[i] > `F`)) {
			return false
		}
		i++
	}

	return true
}

// Check if a string is an integer value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_int() bool {
	mut i := 0

	if (str[i] != `-` && str[i] != `+`) && (!str[i].is_digit()) {
		return false
	} else {
		i++
	}

	if i == str.len && (!str[i - 1].is_digit()) {
		return false
	}

	for i < str.len {
		if str[i] < `0` || str[i] > `9` {
			return false
		}
		i++
	}

	return true
}

Test code

// vlib/builtin/string_test.v file
fn test_is_bin(){

	assert '0b1'.is_bin()       == true
	assert '0b0'.is_bin()       == true
	assert '0b'.is_bin()        == false
	assert '-0b'.is_bin()       == false
	assert '-0b1'.is_bin()      == true
	assert '-0b0'.is_bin()      == true
	assert '-0b1101'.is_bin()   == true
	assert '-0b0101'.is_bin()   == true
	assert '-324'.is_bin()      == false
	assert '-0'.is_bin()        == false
	assert '0x1'.is_bin()       == false
	assert '0x1A'.is_bin()      == false
	assert '-0x1'.is_bin()      == false
	assert '-0x1A'.is_bin()     == false
	assert '0x'.is_bin()        == false
	assert '0'.is_bin()         == false
	assert '0xFF'.is_bin()      == false
	assert '0xG'.is_bin()       == false
	assert '-0xFF'.is_bin()     == false
	assert '0.34'.is_bin()      == false
	assert '0o23'.is_bin()      == false
	assert 'vlang'.is_bin()     == false
}

fn test_is_oct(){

	assert '0o0'.is_oct()       == true
	assert '0o1'.is_oct()       == true
	assert '0o2'.is_oct()       == true
	assert '-0o0'.is_oct()      == true
	assert '-0o1'.is_oct()      == true
	assert '-0o2'.is_oct()      == true

	assert '0o04'.is_oct()      == true
	assert '0o16'.is_oct()      == true
	assert '0o23'.is_oct()      == true
	assert '-0o05'.is_oct()     == true
	assert '-0o13'.is_oct()     == true
	assert '-0o22'.is_oct()     == true

	assert '0o8'.is_oct()       == false
	assert '0o9'.is_oct()       == false
	assert '-0o8'.is_oct()      == false
	assert '-0o9'.is_oct()      == false
	assert '0o84'.is_oct()      == false
	assert '0o96'.is_oct()      == false
	assert '-0o83'.is_oct()     == false
	assert '-0o2923'.is_oct()   == false
	assert '0b1'.is_oct()       == false
	assert '0b0'.is_oct()       == false
	assert '0b'.is_oct()        == false
	assert '-0b'.is_oct()       == false
	assert '-0b1'.is_oct()      == false
	assert '-0b0'.is_oct()      == false
	assert '-0b1101'.is_oct()   == false
	assert '-0b0101'.is_oct()   == false
	assert '-324'.is_oct()      == false
	assert '-0'.is_oct()        == false
	assert '0x1'.is_oct()       == false
	assert '0x1A'.is_oct()      == false
	assert '-0x1'.is_oct()      == false
	assert '-0x1A'.is_oct()     == false
	assert '0x'.is_oct()        == false
	assert '0'.is_oct()         == false
	assert '0xFF'.is_oct()      == false
	assert '0xG'.is_oct()       == false
	assert '-0xFF'.is_oct()     == false
	assert '0.34'.is_oct()      == false
	assert 'vlang'.is_oct()     == false
}

fn test_is_hex(){

	assert '-324'.is_hex()       == false
	assert '-0'.is_hex()         == false
	assert '0x1'.is_hex()        == true
	assert '0x1A'.is_hex()       == true
	assert '-0x1'.is_hex()       == true
	assert '-0x1A'.is_hex()      == true
	assert '0x'.is_hex()         == false
	assert '0'.is_hex()          == false
	assert '0xFF'.is_hex()       == true
	assert '0xG'.is_hex()        == false
	assert '-0xFF'.is_hex()      == true
	assert '0b1101'.is_hex()     == false
	assert '0.34'.is_hex()       == false
	assert '0o23'.is_hex()       == false
	assert 'vlang'.is_hex()      == false
}

fn test_is_int(){

	assert '-324'.is_int()       == true
	assert '234'.is_int()        == true
	assert '-0'.is_int()         == true
	assert '-b'.is_int()         == false
	assert '-123456789'.is_int() == true
	assert '123456789'.is_int()  == true
	assert '0x1'.is_int()        == false
	assert '0b1101'.is_int()     == false
	assert '0.34'.is_int()       == false
	assert '0o23'.is_int()       == false
	assert 'vlang'.is_int()      == false
}

Test output

test

@JalonSolov
Copy link
Contributor

You'll need to format your changes.

@viniciusfdasilva
Copy link
Contributor Author

@JalonSolov Any suggestions on what I should do?

@JalonSolov
Copy link
Contributor

Just run v fmt -w . in your playpen, and commit the reformatted files.

@JalonSolov
Copy link
Contributor

Note that you can install a git hook to check the files when you try to commit them by running cp <v install dir>/cmd/tools/git_pre_commit_hook.vsh <your clone>/.git/hooks/pre-commit

@viniciusfdasilva
Copy link
Contributor Author

@JalonSolov v fmt -w . executed!

@JalonSolov
Copy link
Contributor

Still says string_test.v is not formatted.

@viniciusfdasilva
Copy link
Contributor Author

@JalonSolov Sorry! I had forgotten!

@penguindark
Copy link
Member

my only concern as always is performance,
anyway good job. 👍

@viniciusfdasilva
Copy link
Contributor Author

viniciusfdasilva commented Jan 15, 2024

@penguindark

"I do too! In this case, the implementation meets this requirement, in my view, by eliminating any other invalid formats in the checks before the for loop! =)

If I find a better way, I will submit a new pull request with the improvement! 😀

Copy link
Member

@penguindark penguindark left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After a quick check on the code I'm suggesting to rewrite the functions in a more efficent way.
At my first sight I missed the string allocation in the function.
Allocate in the Heap for a check on a string it is truly not recommended IMHO.
Anyway I appreciate a lot the effort the @viniciusfdasilva put in this code 👍
Thanks a lot for your effort to contribute to V.

vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
vlib/builtin/string.v Outdated Show resolved Hide resolved
@@ -425,6 +425,109 @@ fn test_rsplit_once() ? {
assert ext3 == ''
}

fn test_is_bin() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think if the cases like +0b1001 or +0xFA3 must be managed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! I find the problem! kkkkk

@viniciusfdasilva
Copy link
Contributor Author

@penguindark

I appreciate the recognition for the effort in my collaboration! I'm just starting with the language! I've only had 1 month of exposure to it, and I'm happy to contribute to the language's development! :)

Copy link
Member

@penguindark penguindark left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For me it is ok!
Tnx for your work :)

Comment on lines +1818 to +1823
for i < str.len {
if str[i] < `0` || str[i] > `7` {
return false
}
i++
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More idiomatic (and slightly shorter) V would be

Suggested change
for i < str.len {
if str[i] < `0` || str[i] > `7` {
return false
}
i++
}
for c in str[i..] {
if c < `0` || c > `7` {
return false
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

less performant I suppose due the str[i..] -_-
We can increase speed further using pointers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JalonSolov

for c in str[i..]

I agree with @penguindark. I think direct index access might be more performant by avoiding the cost of generating the slice in my opinion! I suggest sticking to the original implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this were the only change, true. However, you could move the entire function inside for c in str and avoid using the index at all.

However, that can be done later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JalonSolov and @penguindark

Of course! Thank you for the suggestion! I request approval for this PR! I plan to work on some other similar functions as well!

Let's analyze later which aspects can lead one approach to be better than another! :)

I appreciate both of you for the suggestions! 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem of the str[i..] is that it generate a copy of the data,
at the actual compiler state it is very inefficient.

I think we can proceed with the merge of this PR.

Copy link
Member

@penguindark penguindark left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For me it is mergiable

// Check if a string is an octal value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_oct() bool {
mut i := 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mut i := 0
if str.len == 0 {
return false
}
mut i := 0

Copy link
Member

@spytheman spytheman Jan 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise str[i] on the next line will read the 0th byte that V requires for all V strings, for compatibility with C, which is ok, but not ideal (there are some situations where V strings may be generated inside unsafe{} blocks, for internal use, that will not have that 0th byte at the end, and it will be hard to track which string methods need modification for that case, and which do not).

// Check if a string is an binary value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_bin() bool {
mut i := 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, if str.len == 0 { return false } should be here as well.

// Check if a string is an hexadecimal value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_hex() bool {
mut i := 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add if str.len == 0 { return false } at the start.

// Check if a string is an integer value. Returns 'true' if it is, or 'false' if it is not
@[direct_array_access]
pub fn (str string) is_int() bool {
mut i := 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add if str.len == 0 { return false } at the start.

@@ -425,6 +425,168 @@ fn test_rsplit_once() ? {
assert ext3 == ''
}

fn test_is_bin() {
assert '0b1'.is_bin() == true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also assert on the ''.is_bin() etc cases too.

Copy link
Member

@spytheman spytheman left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good work.

@spytheman spytheman merged commit afd74ad into vlang:master Jan 16, 2024
54 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants