/* ************************************************************************************ 項(xiàng)目是分析各視頻的真實(shí)地址 生成一個(gè)dll, 供其它項(xiàng)目使用, 項(xiàng)目中使用了zlib, boost, 要另下載
如果是sohu網(wǎng)站則自己分析地址. 如果是其它視頻網(wǎng)站才從flvcd上獲取結(jié)果. 項(xiàng)目中使用了: 1.gizp解壓. 2.UTF8與GB2312轉(zhuǎn)碼 3.boost正則表達(dá)式 boost查找單個(gè)匹配, 查找所有匹配 4.sohu視頻地址是分了四類視頻分析的. 可以用fiddler查找功能查找到所想要的幾個(gè)字符串 5.文件獲取是使用的MFC中的CHttpFile獲取的, 嘗試用了WinINet和WinHTTP ms的api訪問網(wǎng)絡(luò)的都不怎么行. chrome瀏覽器第一個(gè)版本是用winhttp訪問網(wǎng)絡(luò)的. 也試過socket訪問網(wǎng)絡(luò) 但要跳轉(zhuǎn)什么的太繁了 6.函數(shù)導(dǎo)出, 可以用def文件. 也可以用dellexport 7.多線程CreateThread 注: 網(wǎng)絡(luò)訪問花了相當(dāng)大的時(shí)間 正則表達(dá)式boost中的perl正則表達(dá)式. "要寫成\" \要寫成\\, 要多用查找替換. 匹配多個(gè)結(jié)果時(shí)要迭代搜索查詢 網(wǎng)絡(luò)給的數(shù)據(jù)是壓縮的gzip問題也花了好長(zhǎng)時(shí)間. utf-8與gb2312轉(zhuǎn)換也花了好長(zhǎng)時(shí)間. buff最后一次讀取時(shí), 字符串沒法控制. 內(nèi)存初始化是沒置成0就行了 函數(shù)導(dǎo)出研究了兩種方法, 花了很長(zhǎng)時(shí)間. 多線程沒花多長(zhǎng)時(shí)間 ************************************************************************************ */
?
Analyzer.cpp
vector<string> Analyzer::GetPropertyInIntegratedBrackets(string strPropertyName, string strJson) { vector<string> vect; regex regclipsURL("(?<=(" + strPropertyName + "\":\\[))[^]]+?(?=(]))"); boost::smatch what; string strclipsURL = ""; //轉(zhuǎn)成另一個(gè)變量再傳,不然出錯(cuò) 強(qiáng)轉(zhuǎn)是強(qiáng)的指針,以前是結(jié)構(gòu)類型,強(qiáng)指針沒用 if(regex_search(strJson, what, regclipsURL)) { strclipsURL = what[0]; } int iIndex = 0; while (iIndex >= 0) { iIndex = strclipsURL.find(','); if(iIndex > 0) { vect.push_back(strclipsURL.substr(1, iIndex - 2));//去了兩邊的雙引號(hào) strclipsURL = strclipsURL.substr(iIndex + 1); } else vect.push_back(strclipsURL.substr(1,strlen(strclipsURL.c_str()) - 2)); //去了兩邊的雙引號(hào) } return vect; } int Analyzer::httpgzdecompress(Byte *zdata, uLong nzdata, Byte *data, uLong *ndata) { int err = 0; z_stream d_stream = {0}; /* decompression stream */ static char dummy_head[2] = { 0x8 + 0x7 * 0x10, (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, }; d_stream.zalloc = (alloc_func)0; d_stream.zfree = (free_func)0; d_stream.opaque = (voidpf)0; d_stream.next_in = zdata; d_stream.avail_in = 0; d_stream.next_out = data; if(inflateInit2(&d_stream, 47) != Z_OK) return -1; while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) { d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break; if(err != Z_OK ) { if(err == Z_DATA_ERROR) { d_stream.next_in = (Bytef*) dummy_head; d_stream.avail_in = sizeof(dummy_head); if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) { return -1; } } else return -1; } } if(inflateEnd(&d_stream) != Z_OK) return -1; *ndata = d_stream.total_out; return 0; } //ms-help://MS.VSCC.v90/MS.MSDNQTR.v90.chs/intl/unicode_81rn.htm //將UTF8字符串轉(zhuǎn)換為gb2312 CString Analyzer::ConvertUTF8toGB2312(const char *pData, size_t size) { size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0); WCHAR * pChar = new WCHAR[n+1]; n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n); pChar[n]=0; n = WideCharToMultiByte(936, 0, pChar, -1, 0, 0, 0, 0); char *p = new char[n+1]; n = WideCharToMultiByte(936, 0, pChar, -1, p, (int)n, 0, 0); CString result(p); delete []pChar; delete []p; return result; } CString Analyzer::GetPageHtml(CString strUrl) { CString strHtml = "";//獲取HTML try { strUrl = strUrl.Trim(); CInternetSession session("HttpClient"); session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 5000); // 5秒的連接超時(shí) session.SetOption(INTERNET_OPTION_SEND_TIMEOUT, 1000); // 1秒的發(fā)送超時(shí) session.SetOption(INTERNET_OPTION_RECEIVE_TIMEOUT, 7000); // 7秒的接收超時(shí) session.SetOption(INTERNET_OPTION_DATA_SEND_TIMEOUT, 1000); // 1秒的發(fā)送超時(shí) session.SetOption(INTERNET_OPTION_DATA_RECEIVE_TIMEOUT, 7000); // 7秒的接收超時(shí) session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 1); // 1次重試 CHttpFile* pFile = (CHttpFile*)session.OpenURL((LPCTSTR)strUrl, 1, INTERNET_FLAG_RELOAD | INTERNET_FLAG_TRANSFER_BINARY); DWORD dwStatusCode; pFile-> QueryInfoStatusCode(dwStatusCode); if(dwStatusCode == HTTP_STATUS_OK) { CString strLength = ""; CString strHeaders = ""; pFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH, strLength); pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strHeaders); long lLength = 4096 * 500; byte* pbHtml = new byte[lLength]; //在堆上動(dòng)態(tài)分配內(nèi)存 memset(pbHtml, 0, lLength); //初始化 byte sRecived[512]; int iIndex = 0; int num = 0; while((num = pFile->Read(sRecived,512)) > 0 ) { memcpy(pbHtml+iIndex, sRecived, num); iIndex+=num; } pbHtml[iIndex] = NULL; if(strHeaders.Find("gzip") > -1) { uLong ulLength = 4096 * 500; byte* pbData = new byte[ulLength]; memset(pbData,0,ulLength); httpgzdecompress(pbHtml, lLength, pbData, &ulLength); pbData[ulLength] = NULL; strHtml = (CHAR*)pbData; delete pbData; } else { strHtml = (CHAR*)pbHtml; if(strHeaders.MakeLower().Find("utf-8") > - 1 || strHtml.MakeLower().Find("utf-8") > -1)//strHtml變成小寫了 { strHtml = ConvertUTF8toGB2312((CHAR*)pbHtml,strlen((CHAR*)pbHtml));//編碼轉(zhuǎn)換 } else//重新得到大小寫區(qū)分的 { strHtml = (CHAR*)pbHtml; } } delete pbHtml; } pFile -> Close(); delete pFile; session.Close(); return strHtml; } catch (CException* e) { (void)e; this->m_State = Analyzer_State_NetError; return ""; } }
更多文章、技術(shù)交流、商務(wù)合作、聯(lián)系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號(hào)聯(lián)系: 360901061
您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點(diǎn)擊下面給點(diǎn)支持吧,站長(zhǎng)非常感激您!手機(jī)微信長(zhǎng)按不能支付解決辦法:請(qǐng)將微信支付二維碼保存到相冊(cè),切換到微信,然后點(diǎn)擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對(duì)您有幫助就好】元
