\n\u5c0f\u7f16\u524d\u4e9b\u65e5\u5b50\u5199\u4e86\u4e00\u7bc7\u5173\u4e8eIP\u4ee3\u7406\u6a21\u5757\u7684\u535a\u5ba2(Python\u722c\u866b:\u5236\u4f5c\u4e00\u4e2a\u5c5e\u4e8e\u81ea\u5df1\u7684IP\u4ee3\u7406\u6a21\u5757)\uff0c\u4f46\u662f\u90a3\u4e2a\u8fd8\u9700\u8981\u6539\u8fdb\uff0c\u4eca\u5929\u5c0f\u7f16\u6539\u8fdb\u4e86\u4e00\u4e0b\u90a3\u4e2a\u6a21\u5757\uff0c\u722c\u53d6\u591a\u4e2a\u7f51\u5740\u7684ip\u6570\u636e\uff0c\u7136\u540e\u53bb\u91cd\uff0c\u6700\u540e\u5224\u65ad\u722c\u53d6\u7684ip\u662f\u5426\u53ef\u7528\u3002\u8ba9\u6211\u4eec\u6765\u770b\u770b\u524d\u9762\u7684\u90a3\u4e2a\u6a21\u5757\u548c\u6539\u8fdb\u7684\u6a21\u5757\u5b83\u4eec\u4e24\u8005\u4e4b\u95f4\u7684\u533a\u522b\u5427\uff01<\/p>\n<\/blockquote>\n
<\/p>\n
1.\u722c\u53d63\u4e2a\u514d\u8d39\u63d0\u4f9bip\u4ee3\u7406\u7684\u7f51\u5740<\/h4>\n
\u8fd9\u4e09\u4e2aip\u4ee3\u7406\u7f51\u5740\u5206\u522b\u4e3a:\u5feb\u4ee3\u7406\u3001\u6ce5\u9a6cip\u4ee3\u7406\u300189\u514d\u8d39\u4ee3\u7406<\/p>\n
1.1 \u722c\u53d6\u5feb\u4ee3\u7406\u7f51\u5740\u7684IP\u6570\u636e<\/h5>\n
\u9996\u5148\uff0c\u8ba9\u6211\u4eec\u6765\u722c\u53d6\u5feb\u5feb\u4ee3\u7406\u7684IP\u6570\u636e\u5427\uff01
\u5feb\u4ee3\u7406\u7684\u7f51\u5740\u6837\u5f0f\u4e3a:https:\/\/www.kuaidaili.com\/free\/inha\/{\u9875\u7801}\/
\u82b1\u62ec\u53f7\u4ee3\u8868\u9875\u7801\uff0c\u8fd9\u4e2a\u7f51\u5740\u7684\u9875\u7801\u5f88\u591a\uff0c\u67094000\u591a\u9875\u3002
\u8ba9\u6211\u4eec\u6765\u770b\u770b\u600e\u6837\u5f97\u5230\u8fd9\u4e2a\u7f51\u7ad9\u7684ip\u6570\u636e\u5427\uff01\u8fd9\u91cc\u4f7f\u7528xpath\u8bed\u6cd5
\u56e0\u4e3a\u8fd9\u4e2a\u534f\u8bae\u7c7b\u578b\u662f\u5927\u5199\u5b57\u6bcd\uff0c\u5c0f\u7f16\u9875\u4e0d\u77e5\u9053\u8fd9\u4e2a\u4f1a\u4e0d\u4f1a\u5bf9\u6700\u7ec8\u7ed3\u679c\u9020\u6210\u5f71\u54cd\uff0c\u4e8e\u662f\u5e72\u8106\u628a\u5b83\u8f6c\u6210\u5c0f\u5199\u5b57\u6bcd\u4e86\u3002
\u4ee3\u7801\u4e3a:
\u5e94\u4e3a\u722c\u53d6\u7684\u662f\u591a\u9875ip\u6570\u636e\uff0c\u4f7f\u7528\u5c0f\u7f16\u5728\u722c\u53d61\u9875\u4e4b\u540e\u90a3\u91cc\u4f11\u7720\u4e862\u79d2\u3002<\/p>\n
1.2 \u722c\u53d6\u6ce5\u9a6cip\u4ee3\u7406\u7f51\u5740\u7684IP\u6570\u636e<\/h5>\n
\u8fd9\u4e2a\u7f51\u5740\u7684\u6837\u5f0f\u548c\u5feb\u4ee3\u7406\u4e00\u6837\uff1ahttp:\/\/www.nimadaili.com\/gaoni\/{\u9875\u7801}\/
\u82b1\u62ec\u53f7\u91cc\u9762\u7684\u5185\u5bb9\u662f\u9875\u7801\uff0c\u867d\u7136\u8fd9\u4e2a\u7f51\u5740\u4e0b\u9762\u6807\u7684\u662f\u67092000\u9875
\u4f46\u662f\u771f\u6b63\u6709ip\u6570\u636e\u7684\u9875\u6570\u53ea\u6709300\u591a\u9875\u3002
\u73b0\u5728\u8ba9\u6211\u4eec\u6765\u770b\u770b\u600e\u6837\u5f97\u5230\u8fd9\u4e2a\u7f51\u5740\u7684ip\u6570\u636e\u5427\uff01
\u6709\u7684ip\u53ea\u6709\u4e00\u4e2a\u534f\u8bae\u53ef\u4ee5\u4f7f\u7528\uff08\u5927\u591a\u6570ip\u90fd\u662f\u8fd9\u79cd\u60c5\u51b5\uff09\uff0c\u6709\u7684ip\u4e24\u4e2a\u534f\u8bae\u90fd\u53ef\u4ee5\u4f7f\u7528\uff0c\u8fd9\u91cc\u4e3a\u4e86\u7b80\u5355\uff0c\u5c0f\u7f16\u76f4\u63a5\u9009\u53d61\u4e2aip\u534f\u8bae(\u4e24\u4e2a\u534f\u8bae\u90fd\u53ef\u7528\u7684\uff0c\u9009\u7b2c1\u4e2a)\u3002\u540c\u6837\u8fd9\u91cc\u4e5f\u8981\u8f6c\u6362\u5b57\u7b26\u4e32\u5927\u5c0f\u5199\u60c5\u51b5\u3002
<\/p>\n
1.2 \u722c\u53d689\u514d\u8d39\u4ee3\u7406\u7f51\u5740\u7684IP\u6570\u636e<\/h5>\n
\u8fd9\u4e2a\u7f51\u5740\u6837\u5f0f\u4e3a\uff1ahttps:\/\/www.89ip.cn\/index_{\u9875\u7801}.html
\u82b1\u62ec\u53f7\u4ee3\u8868\u9875\u7801\uff0c\u603b\u5171\u6709\u6548\u7684\u9875\u6570\u4e3a100\u591a\u9875\uff0c\u5177\u4f53\u722c\u53d6\u8fd9\u4e2aip\u6570\u636e\u53c2\u8003\u5982\u4e0b\uff1a
\u56e0\u4e3a\u7f51\u5740\u6ca1\u6709\u63d0\u5230ip\u534f\u8bae\u7c7b\u578b,\u8fd9\u91cc\u76f4\u63a5\u5c06\u4e24\u79cd(http,https)\u90fd\u62fc\u63a5,\u722c\u53d6\u7684\u4fe1\u606f\u524d\u9762\u548c\u540e\u9762\u6709\u6362\u884c\u548c\u7a7a\u683c\uff0c\u6240\u4ee5\u5728\u6700\u7ec8\u62fc\u63a5\u65f6\uff0c\u8fd8\u8981\u8fdb\u884c\u5b57\u7b26\u4e32\u7684\u622a\u53d6\uff0c
<\/p>\n
2.\u9a8c\u8bc1\u722c\u53d6\u7684ip\u662f\u5426\u53ef\u7528<\/h4>\n
\u8fd9\u91cc\u4f9d\u65e7\u4f7f\u7528\u767e\u5ea6\u8fd9\u4e2a\u7f51\u5740\u7528\u6765\u68c0\u9a8c\u722c\u53d6\u7684ip\u662f\u5426\u53ef\u7528\uff0c\u4e3a\u4e86\u63d0\u9ad8\u68c0\u9a8c\u6548\u7387\uff0c\u4f7f\u7528\u591a\u7ebf\u7a0b\u3002
\u53c2\u8003\u4ee3\u7801\u5982\u4e0b:<\/p>\n
def<\/span> checkIps<\/span>(<\/span>self,<\/span>ips)<\/span>:<\/span>\n while<\/span> True<\/span>:<\/span>\n if<\/span> len<\/span>(<\/span>ips)<\/span>==<\/span>0<\/span>:<\/span>\n break<\/span>\n proxies=<\/span>ips.<\/span>pop(<\/span>)<\/span>\n headers =<\/span> { \n <\/span>'user-agent'<\/span>:<\/span> self.<\/span>userAgent.<\/span>getUserAgent(<\/span>)<\/span>}<\/span>\n try<\/span>:<\/span>\n rsp=<\/span>requests.<\/span>get(<\/span>url=<\/span>self.<\/span>url,<\/span>headers=<\/span>headers,<\/span>proxies=<\/span>proxies,<\/span>timeout=<\/span>0.5<\/span>)<\/span> # \u8bbe\u7f6e\u8d85\u65f6\u65f6\u95f4<\/span>\n if<\/span>(<\/span>rsp.<\/span>status_code==<\/span>200<\/span>)<\/span>:<\/span>\n self.<\/span>userfulProxies.<\/span>append(<\/span>proxies)<\/span>\n print<\/span>(<\/span>'========IP{}\u53ef\u7528'<\/span>.<\/span>format<\/span>(<\/span>proxies)<\/span>)<\/span> # \u6d4b\u8bd5\u9700\u8981\u7684\u8bdd\u53ef\u4ee5\u4e0d\u6ce8\u91ca\u6389<\/span>\n time.<\/span>sleep(<\/span>1<\/span>)<\/span> # \u4f11\u77201\u79d2\u949f<\/span>\n\n except<\/span> Exception as<\/span> e:<\/span>\n print<\/span>(<\/span>e)<\/span>\n print<\/span>(<\/span>'========IP{}\u4e0d\u53ef\u7528'<\/span>.<\/span>format<\/span>(<\/span>proxies)<\/span>)<\/span> # \u7528\u4e8e\u6d4b\u8bd5\uff0c\u53ef\u7528\u6ce8\u9500\u6389\u7684<\/span>\n\n def<\/span> getUserIps<\/span>(<\/span>self)<\/span>:<\/span> # \u5f97\u5230\u53ef\u7528\u7684ip\u6570\u636e<\/span>\n self.<\/span>spiderIps(<\/span>)<\/span>\n ips=<\/span>self.<\/span>proxies[<\/span>:<\/span>]<\/span>\n # \u722c\u53d6\u7684ip\u603b\u6570\u4e0a\u767e\uff0c\u4f7f\u752810\u4e2a\u7ebf\u7a0b<\/span>\n threads=<\/span>[<\/span>]<\/span>\n for<\/span> i in<\/span> range<\/span>(<\/span>10<\/span>)<\/span>:<\/span>\n thread=<\/span>threading.<\/span>Thread(<\/span>target=<\/span>self.<\/span>checkIps,<\/span>args=<\/span>(<\/span>ips,<\/span>)<\/span>)<\/span>\n thread.<\/span>start(<\/span>)<\/span>\n threads.<\/span>append(<\/span>thread)<\/span>\n\n for<\/span> th in<\/span> threads:<\/span>\n th.<\/span>join(<\/span>)<\/span>\n\n print<\/span>(<\/span>'IP test completed!'<\/span>)<\/span> # ip\u6d4b\u8bd5\u5b8c\u6bd5\uff01<\/span>\n print<\/span>(<\/span>'(The number of available IPs is:[%d])'<\/span> %<\/span> len<\/span>(<\/span>self.<\/span>userfulProxies)<\/span>)<\/span> # \u603b\u5171\u6709\u6548\u7684ip\u6570\u76ee\u4e3a<\/span>\n print<\/span>(<\/span>'IP proxy efficiency is--{:.2f}%'<\/span>.<\/span>format<\/span>(<\/span>(<\/span>len<\/span>(<\/span>self.<\/span>userfulProxies)<\/span>\/<\/span>len<\/span>(<\/span>self.<\/span>proxies)<\/span>)<\/span>*<\/span>100<\/span>)<\/span>)<\/span> # \u722c\u53d6\u7684ip\u6709\u6548\u7387<\/span>\n\n return<\/span> self.<\/span>userfulProxies # \u628a\u53ef\u7528\u7684ip\u6570\u636e\u8fd4\u56de<\/span>\n<\/code><\/pre>\n\u8fd0\u884c\u7ed3\u679c:
<\/p>\n
3.\u6700\u7ec8\u5b9e\u6218:\u5229\u7528ip\u4ee3\u7406\u722c\u53d6\u96c6\u56fe\u7f51\u4e0a500\u9875\u5c0f\u59d0\u59d0\u7684\u56fe\u7247<\/h4>\n
\u4e0a\u4e00\u6b21\u4f7f\u7528\u5f00\u59cb\u7684\u90a3\u4e2aip\u4ee3\u7406\u6a21\u5757\uff0c\u722c\u53d6\u8fd9\u4e2a\u7f51\u5740\u76f4\u63a5\u5168\u90e8\u4e0d\u80fd\u4f7f\u7528\uff0c\u73b0\u5728\u5c0f\u7f16\u60f3\u80af\u5b9a\u53ef\u7528\u3002\u6bd5\u7adf
\u800c
\u5f00\u59cb\u722c\u53d6\uff08\u8fd9\u91cc\u53ea\u6f14\u7b97\u8fd9\u4e2aip\u4ee3\u7406\u6a21\u5757\u7684\u53ef\u7528\u6027\uff0c\u53ea\u722c\u53d6\u56fe\u7247\u94fe\u63a5\uff0c\u5e76\u4e0d\u4e0b\u8f7d\u56fe\u7247\uff0c\u4e0d\u8fc7\uff0c\u5982\u679c\u8981\u4e0b\u8f7d\u56fe\u7247\uff0c\u4e0d\u4e5f\u662f1\u884c\u4ee3\u7801\u7684\u4e8b\u5417\uff1f\u54c8\u54c8\uff09
\u53c2\u8003\u4ee3\u7801\u5982\u4e0b:<\/p>\n
import<\/span> requests\nfrom<\/span> crawlers.<\/span>userAgent import<\/span> useragent # \u5bfc\u5165\u81ea\u5df1\u81ea\u5b9a\u4e49\u7684\u7c7b\uff0c\u4e3b\u8981\u4f5c\u7528\u4e3a\u968f\u673a\u53d6user-agent\u7684\u503c<\/span>\nfrom<\/span> lxml import<\/span> etree\nfrom<\/span> Craw_2.<\/span>Test.<\/span>wenti1 import<\/span> IPs # \u5bfc\u5165ip\u4ee3\u7406\u6a21\u5757<\/span>\n\nip=<\/span>IPs(<\/span>)<\/span>\nproxiess=<\/span>ip.<\/span>getUserIps(<\/span>)<\/span>\n\nuserAgent=<\/span>useragent(<\/span>)<\/span>\nurl=<\/span>'http:\/\/www.jituwang.com\/sucai\/meinv-7559813-%d.html'<\/span>\ni=<\/span>1<\/span>\nproxies=<\/span>proxiess.<\/span>pop(<\/span>)<\/span>\nprint<\/span>(<\/span>'\u5f00\u59cb\u722c\u53d6'<\/span>)<\/span>\nwhile<\/span> i<<\/span>501<\/span>:<\/span>\n try<\/span>:<\/span>\n headers =<\/span> { \n <\/span>'user-agent'<\/span>:<\/span> userAgent.<\/span>getUserAgent(<\/span>)<\/span>}<\/span>\n rsp=<\/span>requests.<\/span>get(<\/span>url=<\/span>url%<\/span>(<\/span>i)<\/span>,<\/span>headers=<\/span>headers,<\/span>proxies=<\/span>proxies,<\/span>timeout=<\/span>1<\/span>)<\/span>\n if<\/span> rsp.<\/span>status_code==<\/span>200<\/span>:<\/span>\n print<\/span>(<\/span>proxies,<\/span>i)<\/span>\n html