Encoding trouble with HttpWebResponse

CharacterSet is “ISO-8859-1” by default, if it is not specified in server’s content type header (different from “charset” meta tag in HTML).
I compare HttpWebResponse.CharacterSet with charset attribute of HTML. If they are different – I use the charset as specified in HTML to re-read the page again, but with correct encoding this time.

See the code:

    string strWebPage = "";
    // create request
    System.Net.WebRequest objRequest = System.Net.HttpWebRequest.Create(sURL);
    // get response
    System.Net.HttpWebResponse objResponse;
    objResponse = (System.Net.HttpWebResponse)objRequest.GetResponse();
    // get correct charset and encoding from the server's header
    string Charset = objResponse.CharacterSet;
    Encoding encoding = Encoding.GetEncoding(Charset);
    // read response
    using (StreamReader sr = 
           new StreamReader(objResponse.GetResponseStream(), encoding))
    {
        strWebPage = sr.ReadToEnd();
        // Close and clean up the StreamReader
        sr.Close();
    }

    // Check real charset meta-tag in HTML
    int CharsetStart = strWebPage.IndexOf("charset=");
    if (CharsetStart > 0)
    {
        CharsetStart += 8;
        int CharsetEnd = strWebPage.IndexOfAny(new[] { ' ', '\"', ';' }, CharsetStart);
        string RealCharset = 
               strWebPage.Substring(CharsetStart, CharsetEnd - CharsetStart);

        // real charset meta-tag in HTML differs from supplied server header???
        if(RealCharset!=Charset)
        {
            // get correct encoding
            Encoding CorrectEncoding = Encoding.GetEncoding(RealCharset);

            // read the web page again, but with correct encoding this time
            //   create request
            System.Net.WebRequest objRequest2 = System.Net.HttpWebRequest.Create(sURL);
            //   get response
            System.Net.HttpWebResponse objResponse2;
            objResponse2 = (System.Net.HttpWebResponse)objRequest2.GetResponse();
            //   read response
            using (StreamReader sr = 
              new StreamReader(objResponse2.GetResponseStream(), CorrectEncoding))
            {
                strWebPage = sr.ReadToEnd();
                // Close and clean up the StreamReader
                sr.Close();
            }
        }
    }

Leave a Comment

tech