Home > Software engineering >  A Hebrew characters of several bytes
A Hebrew characters of several bytes

Time:09-24


 char a [20]="?" ; 
Char sn=0 xe4;
Printf (" % c \ n ", sn);
Int len=strlen (a);
Printf (" % d \ n ", len);
Printf (" % % % % % % x x x x x x x % ", a [0], a [1], a [2], a [3], a [4], a [5], a [6]);



Before print out on MFC program is two bytes, two days later, I print in the console program for 1 byte, again in MFC program is running, is also a byte, I want to ask:
1 is a single-byte or double byte,
2 the output character is why? Rather than 0 xe4 corresponding Hebrew characters, 0 xe4 in Hebrew character encoding, other Hebrew character data also tried, or print out? ,
3. A put is Hebrew character array, % x printed why is 3 f, I tried other Hebrew characters, 3 f, is this why?

CodePudding user response:

What is the difference between UNICODE and characters?
It is recommended to use UNICODE WCHAR LPCWSTR
https://unicode-table.com/cn/blocks/hebrew/

CodePudding user response:

Unicode is UCS16 commonly, a single character two bytes

CodePudding user response:

 # pragma comment (lib, "user32") 
# pragma comment (lib, "gdi32")
# include & lt; Conio. H>
# include & lt; stdio.h>
# include & lt; stdlib.h>
# include & lt; Windows. H>
Extern "C" HWND WINAPI GetConsoleWindow ();
Void HideTheCursor () {
CONSOLE_CURSOR_INFO cciCursor;
HANDLE hStdOut=GetStdHandle (STD_OUTPUT_HANDLE);

If (GetConsoleCursorInfo (hStdOut, & amp; CciCursor)) {
CciCursor. BVisible=FALSE;
SetConsoleCursorInfo (hStdOut, & amp; CciCursor);
}
}
Void ShowTheCursor () {
CONSOLE_CURSOR_INFO cciCursor;
HANDLE hStdOut=GetStdHandle (STD_OUTPUT_HANDLE);

If (GetConsoleCursorInfo (hStdOut, & amp; CciCursor)) {
CciCursor. BVisible=TRUE;
SetConsoleCursorInfo (hStdOut, & amp; CciCursor);
}
}
Int main () {
The HWND HWND;
HDC HDC.
HFONT HFONT;
Would the wc [2];

System (" color F0 ");
system("cls");
HideTheCursor ();
HWND=GetConsoleWindow ();
HDC=GetDC (HWND);
Hfont=CreateFont (48,0,0,0,0,0,0,0, GB2312_CHARSET, 0,0,0,0, "song typeface - founder large character set");
SelectObject (HDC, hfont);
Wc [0]=0 xd854u;
Wc [1]=0 xdc00u;
TextOutW (HDC, 10, 10, wc, 2);
DeleteObject (hfont);
ReleaseDC (HWND, HDC);
Getch ();
07 system (" color ");
system("cls");
ShowTheCursor ();
return 0;
}
# if 0
Paragraphs agent or broker for a pair of common represent the 16-bit Unicode value of a single character, to remember the key point is:
Agent for is actually a 32-bit single characters, can no longer assume a 16-bit Unicode value is mapped to a character,

Use a proxy item for
Agent of the first value is high, contain between U + D800 to U + DBFF 16-bit code values, within the scope of the
The item to the second value is low agent, containing between U + DC00 to U + DFFF range value, through the use of the agent on,
16-bit Unicode system can already defined by the Unicode standard of more than one hundred other characters to addressing (220),

In the passed to XmlTextWriter method agent can be used in any string of characters, however, agent of the
characters in writingShould be effective in the XML, for example, the world wide web consortium (W3C) recommendations are not allowed to use in the name of the element or attribute agent character,
If the string contains invalid agent on, will cause abnormal,

In addition, you can use WriteSurrogateCharEntity write with the agent to the corresponding character entities, character entities to 16
Generate hexadecimal format to write, using the following formula:

(highChar xd800 0) * 0 x400 + (lowChar - 0 xdc00) + 0 x10000

If the string contains invalid agent on, an exception is thrown, the following example shows the proxy methods to WriteSurrogateCharEntity as input,

C # copy
//The following line writes & amp; # x10000.
WriteSurrogateCharEntity (' \ uDC00 ', '\ uD800');
The following sample generates an agent to file, loads it into the XmlReader, with the new filename to save the file,
Then, the original file and the new file is loaded back to the application of the XML document object model (DOM) structure in order to compare,

C # copy
Char lowChar highChar;
Char [] charArray=new char [10].
FileStream targetFile=new FileStream (" SurrogatePair. XML, "
FileMode. Create, FileAccess ReadWrite, FileShare. ReadWrite);

LowChar=the Convert. ToChar (0 xdc00);
HighChar=the Convert. ToChar (0 xd800);
XmlTextWriter tw=new XmlTextWriter (targetFile, null);
Tw. Formatting=Formatting. Indented;
Tw. WriteStartElement (" root ");
Tw. WriteStartAttribute (" test ", null);
Tw. WriteSurrogateCharEntity (lowChar highChar);
LowChar=the Convert. ToChar (0 xdc01);
HighChar=the Convert. ToChar (0 xd801);
Tw. WriteSurrogateCharEntity (lowChar highChar);
LowChar=the Convert. ToChar (0 XDFFF);
HighChar=the Convert. ToChar (0 XDBFF);
Tw. WriteSurrogateCharEntity (lowChar highChar);

//Add 10 random surrogate pairs.
//As Unicode, the high bytes are in the lower
//the memory; For example, the word 6 a21 as 21 6 a.
//The high or low, is in The logical sense.
The Random Random=new Random ();
for (int i=0; I & lt; 10; + + I) {
LowChar=the Convert. ToChar (random. Next (xe000 xdc00 0, 0));
HighChar=the Convert. ToChar (random. Next (xdc00 xd800 0, 0));
CharArray [I]=highChar;
CharArray [+ + I]=lowChar;
}
Tw. WriteChars (charArray, 0, charArray. Length);

for (int i=0; I & lt; 10; + + I) {
LowChar=the Convert. ToChar (random. Next (xe000 xdc00 0, 0));
HighChar=the Convert. ToChar (random. Next (xdc00 xd800 0, 0));
Tw. WriteSurrogateCharEntity (lowChar highChar);
}

Tw. WriteEndAttribute ();
Tw. WriteEndElement ();
Tw. Flush ();
Tw. Close ();

XmlTextReader r=new XmlTextReader (" SurrogatePair. XML ");

R.R ead ();
R.M oveToFirstAttribute ();
TargetFile=new FileStream (" SurrogatePairFromReader. XML, "
nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
  • Related