2017-07-29 64 views
1

當我嘗試做出安全請求時,出現一個奇怪的錯誤,我無法找到該錯誤。我確定這是愚蠢的。請求爲什麼在獲得安全鏈接時失敗

#!/usr/bin/env python 


''' 
this module was designed with web scrapers and web crawlers in mind. 
I find my self writing these functions all the time. I Wrote this model 
to save time. 
''' 

import requests 
import urlparse 
import urllib2 
import urllib 
import re 
import os 
import json 
from fake_useragent import UserAgent 

class InvalidURL(Exception): 
    pass 

class URL(object): 
    '''Common routines for dealing with URLS. 
    ''' 
    def __init__(self, url): 
     '''Setup the initial state 
     ''' 
     self.raw_url = url 
     self.url = urlparse.urlparse(url) 
     self.scheme = self.url.scheme 
     self.domain = self.url.netloc 
     self.path = self.url.path 
     self.params = self.url.params 
     self.query = self.url.query 
     self.fragment = self.url.fragment 


    def __str__(self): 
     ''' This os called when somthing 
     asks for a string representation of the 
     url 
     ''' 
     return self.raw_url 


    def valid(self): 
     """Validate the url. 

     returns True if url is valid 
     and False if it is not 
     """ 
     regex = re.compile(
      r'^(?:http|ftp)s?://' # http:// or https:// 
      r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' 
      r'localhost|' #localhost... 
      r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip 
      r'(?::\d+)?' # optional port 
      r'(?:/?|[/?]\S+)$', re.IGNORECASE) 
     match = regex.match(self.raw_url) 
     if match: 
      return True 


    def unquote(self): 
     """unquote('abc%20def') -> 'abc def'.""" 

     return urllib2.unquote(self.raw_url) 


    def quote(self): 
     """quote('abc def') -> 'abc%20def' 

     Each part of a URL, e.g. the path info, the query, etc., has a 
     different set of reserved characters that must be quoted. 

     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 
     the following reserved characters. 

     reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 
         "$" | "," 

     Each of these characters is reserved in some component of a URL, 
     but not necessarily in all of them. 

     By default, the quote function is intended for quoting the path 
     section of a URL. Thus, it will not encode '/'. This character 
     is reserved, but in typical usage the quote function is being 
     called on a path where the existing slash characters are used as 
     reserved characters. 
     """ 
     return urllib2.quote(self.raw_url) 


    def parameters(self): 
     """ 
     parse the parameters of the url 
     and return them as a dict. 
     """ 
     return urlparse.parse_qs(self.params) 


    def secure(self): 
     """ Checks if the url uses ssl. """ 
     if self.scheme == 'https': 
      return True 


    def extention(self): 
     """ return the file extention """ 
     return os.path.splitext(self.path)[1] 


    def absolute(self): 
     """ Checks if the URL is absolute. """ 
     return bool(self.domain) 


    def relitive(self): 
     """ Checks if the url is relitive. """ 
     return bool(self.scheme) is False 


    def encode(self, mapping): 
     """Encode a sequence of two-element tuples or dictionary into a URL query string. 

     If any values in the query arg are sequences and doseq is true, each 
     sequence element is converted to a separate parameter. 

     If the query arg is a sequence of two-element tuples, the order of the 
     parameters in the output will match the order of parameters in the 
     input. 
     """ 
     query = urllib.urlencode(mapping) 
     return urlparse.urljoin(self.raw_url, query) 


class Request(object): 


    allow_redirects = True 
    timeout = 5 
    ramdom_useragent = 0 
    verify = False 
    session = requests.Session() 
    stream = True 
    proxies = {} 

    def __init__(self, url): 
     """ Set the inital state """ 
     self.agentHeaders = {} 
     self.url = URL(url) 
     if not self.url.valid(): 
      raise InvalidURL("{} is invalid".format(url)) 

    def stream(self, answer): 
     self.stream = bool(answer) 

    def randomUserAgent(self): 
     """ Set a random User-Agent """ 
     self.setUserAgent(UserAgent().random) 


    def allowRedirects(self, answer): 
     """ Choose whether or not to follow redirects.""" 
     self.allow_redirects = bool(answer) 


    def setUserAgent(self, agent): 
     """ Set the User-Agent """ 
     self.setHeaders('User-Agent', agent) 


    def setHeaders(self, key, value): 
     """ Set custom headers """ 
     self.agentHeaders[key] = value 


    def verify(self, answer): 
     """ Set whether or not to verify SSL certs""" 
     self.verify = bool(answer) 


    def get(self): 
     """Sends a GET request""" 
     return self.session.get(
      url=self.url, 
      headers=self.agentHeaders, 
      allow_redirects=self.allow_redirects, 
      timeout=self.timeout, 
      verify=self.verify, 
      stream=self.stream, 
      proxies=self.proxies 
      ) 


    def head(self): 
     """ Send a head request and return the headers """ 
     return self.session.head(
      self.url, 
      headers=self.agentHeaders, 
      allow_redirects=self.allow_redirects, 
      timeout=self.timeout, 
      verify=self.verify, 
      proxies=self.proxies 
      ).headers 


    def options(self): 
     """ Send a options request and return the options """ 
     return self.session.options(
      self.url, 
      headers=self.agentHeaders, 
      allow_redirects=self.allow_redirects, 
      timeout=self.timeout, 
      verify=self.verify, 
      proxies=self.proxies 
      ).headers['allow'] 


    def json(self): 
     """ 
     Deserialize json data (a ``str`` or ``unicode`` instance 
     containing a JSON document) to a Python object. 
     """ 
     return json.loads(self.text) 


    def headerValue(self, value): 
     """ Get a value from the headers. """ 
     return self.headers().get(value) 



request = Request('https://www.google.com') 
req = request.get() 
print req.text 
print request.head() 
print 
print req.headers.get('link') 
print request.options() 

request = Request('https://www.google.com') 
req = request.get() 

Sat Jul 29 HttpClient python UserAgent.py 
Traceback (most recent call last): 
    File "UserAgent.py", line 234, in <module> 
    req = request.get() 
    File "UserAgent.py", line 192, in get 
    proxies=self.proxies 
    File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 515, in get 
    return self.request('GET', url, **kwargs) 
    File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 502, in request 
    resp = self.send(prep, **send_kwargs) 
    File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 612, in send 
    r = adapter.send(request, **kwargs) 
    File "/home/ricky/.local/lib/python2.7/site-packages/requests/adapters.py", line 407, in send 
    self.cert_verify(conn, request.url, verify, cert) 
    File "/home/ricky/.local/lib/python2.7/site-packages/requests/adapters.py", line 224, in cert_verify 
    if not cert_loc or not os.path.exists(cert_loc): 
    File "/usr/lib/python2.7/genericpath.py", line 26, in exists 
    os.stat(path) 
TypeError: coercing to Unicode: need string or buffer, instancemethod found 

回答

1

Request.verify方法請看:

def verify(self, answer): 
    """ Set whether or not to verify SSL certs""" 
    self.verify = bool(answer) 

Request.verify屬性相沖突。

所以,當你打電話給你Request.get(),你傳遞你的verify實例方法verify參數requests.session.get(..., verify=<your method>),而不是字符串(should point to a certificate bundle)或bool

線索在你的堆棧跟蹤:TypeError: coercing to Unicode: need string or buffer, instancemethod found

解決方案:將您的verify方法重命名爲setVerify(與其他方法一致)。

無關這個問題,我建議你通過擴展requests.Session類來實現你的Request類。這樣你將有更少的方法來定義(如get,head,json等)

相關問題