#include <CharsetDetection.h>
|
static std::string | GetBomEncoding (const char *const content, const size_t contentLength) |
|
static std::string | GetBomEncoding (const std::string &content) |
|
static bool | DetectXmlEncoding (const std::string &xmlContent, std::string &detectedEncoding) |
|
static bool | DetectXmlEncoding (const char *const xmlContent, const size_t contentLength, std::string &detectedEncoding) |
|
static bool | ConvertHtmlToUtf8 (const std::string &htmlContent, std::string &converted, const std::string &serverReportedCharset="") |
|
static bool | ConvertHtmlToUtf8 (const std::string &htmlContent, std::string &converted, const std::string &serverReportedCharset, std::string &usedHtmlCharset) |
|
static bool | ConvertPlainTextToUtf8 (const std::string &textContent, std::string &converted, const std::string &serverReportedCharset, std::string &usedCharset) |
|
◆ ConvertHtmlToUtf8() [1/2]
bool CCharsetDetection::ConvertHtmlToUtf8 |
( |
const std::string & |
htmlContent, |
|
|
std::string & |
converted, |
|
|
const std::string & |
serverReportedCharset, |
|
|
std::string & |
usedHtmlCharset |
|
) |
| |
|
static |
Detect HTML charset and HTML convert to UTF-8
- Parameters
-
htmlContent | content of HTML file |
converted | receive result of conversion |
serverReportedCharset | charset from HTTP header or from other out-of-band source, empty if unknown or unset |
usedHtmlCharset | receive charset used for conversion |
- Returns
- true if charset is properly detected and HTML is correctly converted, false if charset is only guessed
◆ ConvertHtmlToUtf8() [2/2]
static bool CCharsetDetection::ConvertHtmlToUtf8 |
( |
const std::string & |
htmlContent, |
|
|
std::string & |
converted, |
|
|
const std::string & |
serverReportedCharset = "" |
|
) |
| |
|
inlinestatic |
Detect HTML charset and HTML convert to UTF-8
- Parameters
-
htmlContent | content of HTML file |
converted | receive result of conversion |
serverReportedCharset | charset from HTTP header or from other out-of-band source, empty if unknown or unset |
- Returns
- true if charset is properly detected and HTML is correctly converted, false if charset is only guessed
◆ ConvertPlainTextToUtf8()
bool CCharsetDetection::ConvertPlainTextToUtf8 |
( |
const std::string & |
textContent, |
|
|
std::string & |
converted, |
|
|
const std::string & |
serverReportedCharset, |
|
|
std::string & |
usedCharset |
|
) |
| |
|
static |
Try to convert plain text to UTF-8 using best suitable charset
- Parameters
-
textContent | text to convert |
converted | receive result of conversion |
serverReportedCharset | charset from HTTP header or from other out-of-band source, empty if unknown or unset |
usedCharset | receive charset used for conversion |
- Returns
- true if converted without errors, false otherwise
◆ DetectXmlEncoding() [1/2]
bool CCharsetDetection::DetectXmlEncoding |
( |
const char *const |
xmlContent, |
|
|
const size_t |
contentLength, |
|
|
std::string & |
detectedEncoding |
|
) |
| |
|
static |
◆ DetectXmlEncoding() [2/2]
static bool CCharsetDetection::DetectXmlEncoding |
( |
const std::string & |
xmlContent, |
|
|
std::string & |
detectedEncoding |
|
) |
| |
|
inlinestatic |
◆ GetBomEncoding() [1/2]
std::string CCharsetDetection::GetBomEncoding |
( |
const char *const |
content, |
|
|
const size_t |
contentLength |
|
) |
| |
|
static |
Detect text encoding by Byte Order Mark Multibyte encodings (UTF-16/32) always ends with explicit endianness (LE/BE)
- Parameters
-
content | pointer to text to analyze |
contentLength | length of text |
- Returns
- detected encoding or empty string if BOM not detected
◆ GetBomEncoding() [2/2]
static std::string CCharsetDetection::GetBomEncoding |
( |
const std::string & |
content | ) |
|
|
inlinestatic |
Detect text encoding by Byte Order Mark Multibyte encodings (UTF-16/32) always ends with explicit endianness (LE/BE)
- Parameters
-
content | the text to analyze |
- Returns
- detected encoding or empty string if BOM not detected
The documentation for this class was generated from the following files: