How to detect a UTF-8 file without bom?

I need to open UTF-8 files in a text editor, but don't know how to detect UTF-8 without bom. The WIN32 IsTextUnicode function doesn't have a UTF-8 flag
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
bool IsUTF8(const void* pBuffer, long size)
{
    bool IsUTF8 = true;
    unsigned char* start = (unsigned char*)pBuffer;
    unsigned char* end = (unsigned char*)pBuffer + size;
    while (start < end)
    {
        if (*start < 0x80) // (10000000)[output][/output]
        {
            start++;
        }
        else if (*start < (0xC0)) // (11000000)
        {
            IsUTF8 = false;
            break;
        }
        {
            if (start >= end - 1) 
                break;
            if ((start[1] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }
            start += 2;
        } 
        else if (*start < (0xF0)) // (11110000)
        {
            if (start >= end - 2) 
                break;
            if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
            {
                IsUTF8 = false;
                break;
            }
            start += 3;
        } 
        else
        {
            IsUTF8 = false;
            break;
        }
    }
    return IsUTF8;
}
Registered users can post here. Sign in or register to post.