C# .Net Console Application to Retrieve Framed and Javascript Protected HTML Pages and Convert to .XPS

I recently had the opportunity to solve a highly repetitive keyboarding process with a quick and dirty console application.

This application finds saved web pages in a known folder, retrieves each page’s user information, goes to the original website and retrieves a training certificate that is javascript “right-click” disabled, saves it as a .doc then as a .xps.

Some research of the original websites structure was necessary to complete this task.

It’s not pretty, but it works and I didn’t have to get into the programmatic complexity of Microsoft’s XML Paper Specification.

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Net;
using System.Windows.Xps;
using System.Windows.Xps.Packaging;
using System.Windows.Xps.Serialization;
using System.Xml;
using Microsoft.Office.Interop.Word;

namespace HTTPWebResponseToXPS
{
    class Program
    {
        static void Main(string[] args)
        {
            string path = @"C:\Users\Chris\Desktop\CertificateFolder\";
            string lineOfText = "";
            string lookingFor = @"strUserID=";
            string loginName = "";
            int spos = 0;
            DirectoryInfo di = new DirectoryInfo(path);
            FileInfo[] lstFiles = di.GetFiles("*.htm");
            foreach (FileInfo fi in lstFiles)
            {
                string userName = fi.Name.Replace(".htm", string.Empty);
                using (StreamReader sr = new StreamReader(fi.FullName))
                {
                    while (sr.Peek() >= 0)
                    {
                        lineOfText = "";
                        loginName = "";
                        spos = 0;
                        lineOfText = sr.ReadLine().Replace("\"", string.Empty);
                        spos = lineOfText.IndexOf(lookingFor);
                        if (spos != -1)
                        {
                            loginName = lineOfText.Substring(spos+10,8);
                            getWebPage(loginName, userName);
                        }
                    } 
                    sr.Close();
                    deleteWordDoc(path + userName + ".doc");
                }                
            }
            Console.Read();
        }
        static void deleteWordDoc(string p)
        {
            FileInfo file = new FileInfo(p);            
            try
            {
                file.Delete();
            }
            catch  //or maybe in finally
            {
                GC.Collect();
                System.Threading.Thread.Sleep(500);
                file.Delete();
            }
        }
        static void getWebPage(string loginName, string userName)
        {
            string url = @"https://TargeWebSite/TrainingCertificates/CrypticGetCertificatePage.asp?certificate_ident=12345&training_ident=abcde&login=" + loginName;
            WebRequest request = WebRequest.Create(url);
            request.Credentials = CredentialCache.DefaultCredentials;
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //Console.WriteLine(response.StatusDescription);
            Stream dataStream = response.GetResponseStream();
            StreamReader reader = new StreamReader(dataStream);
            string responseFromServer = reader.ReadToEnd();
            createWORDdocument(userName, responseFromServer);
            reader.Close();
            dataStream.Close();
            response.Close();
        }

        private static void createWORDdocument(string userName, string responseFromServer)
        {
            string path = @"C:\Users\Chris\Desktop\CertificateFolder\";            
            StringBuilder strBuilder = new StringBuilder();
            strBuilder.AppendLine(@"<html xmlns:o='urn:schemas-microsoft-com:office:office' xmlns:w='urn:schemas-microsoft-com:office:word'xmlns='http://www.w3.org/TR/REC-html40'> <head><title>Time</title>");
            strBuilder.AppendLine(@"<style> <!-- /* Style Definitions */ @page Section1    {size:11.0in 8.5in;     margin:.25in .25in .25in .25in ;     mso-header-margin:.5in;    mso-footer-margin:.5in; mso-paper-source:0;} div.Section1   {page:Section1;} --> </style></head>");
            strBuilder.Append(responseFromServer.Replace("\"", string.Empty).Replace("src=", @"src=https://TargeWebSite/TrainingCertificates/").Replace("href=", @"href=https://TargeWebSite/"));
            using (FileStream fStream = File.Create(path + userName + ".doc"))
            {
                fStream.Close();
            }
            using (StreamWriter sWriter = new StreamWriter(path + userName + ".doc"))
            {
                sWriter.Write(strBuilder);
            }
            ConvertWordDocToXPSDoc(path + userName + ".doc", path + "XPS/" + userName + ".xps");
        }
        private static XpsDocument ConvertWordDocToXPSDoc(string wordDocName, string xpsDocName)
        {
            Microsoft.Office.Interop.Word.Application wordApplication = new Microsoft.Office.Interop.Word.Application();
            wordApplication.Documents.Open(wordDocName);
            System.Threading.Thread.Sleep(500);
            Document doc = wordApplication.ActiveDocument;
            try
            {
                doc.PageSetup.Orientation = WdOrientation.wdOrientLandscape;
                doc.PageSetup.LeftMargin = (float).25;
                doc.PageSetup.RightMargin = (float).25;
                doc.PageSetup.BottomMargin = (float).25;
                doc.SaveAs(xpsDocName, WdSaveFormat.wdFormatXPS);
                doc.Undo();
                doc.Undo();
                doc.Undo();
                doc.Undo();
                wordApplication.Quit();
                XpsDocument xpsDoc = new XpsDocument(xpsDocName, System.IO.FileAccess.Read);
                return xpsDoc;
            }
            catch (Exception exp)
            {
                string str = exp.Message;
            }
            System.Threading.Thread.Sleep(500);
            return null;
        }
    }
}

CodeProject

Advertisements