python-http短连接-socket长连接 模拟用户操作(爬虫?)

本文分享了一个关于如何使用Python模拟用户操作进行自动化任务的实战经验,包括HTTP请求的发送与接收、WebSocket通信及长连接的维护等内容。作者通过一个具体的聊天室机器人项目,详细介绍了从登录认证到消息收发的具体实现过程。

有在玩一个网上匿名聊天室2333,很简单的一个聊天室,有一天呢,在里面看到了一个用户,其实是个robot,能够自动回复并且能点歌之类的一些简单命令操作,所以也想自己搞一个,于是就开始了一周的爬坑之旅の,于此把总结的流程展示如下
git路径
1.http访问(get/post),获取到html或者json字符串,以及session/cookie自动存取,
使用一些网络模块,并封装了基本http工具,实现了模拟浏览器登录的效果,访问地址之后自动存入session/cookie,在接着访问后续网页时自动携带新的cookie

#!/usr/bin/python
#-*- coding:utf-8 -*-  
import urllib
import urllib2
import cookielib
import tool
import traceback

class Http:
    def __init__(self):
        self.cookie = cookielib.CookieJar()
        self.cookieHander = urllib2.HTTPCookieProcessor(self.cookie)
        self.opener = urllib2.build_opener(self.cookieHander)

        header = {
            "User-Agent":"Mozilla/6.0",
        }
        turnHeader = []
        for item in header.keys():
            turnHeader.append( (item, header[item]) )

        self.opener.addheaders = turnHeader
        # urllib2.install_opener(opener)  
        return
    def out(self, *obj):
        print("http." + str(obj))
    def getCookie(self):
        res = {}
        for item in self.cookie:
            res[item.name] = item.value
        return res
    def show(self, response):
        tool.line()
        try:
            self.out("Cookie:")
            for item in self.cookie:
                self.out( '##' + item.name + ':' + item.value)

            self.out("Code: " + str(response.getcode()))
            self.out("Res : " + str(response.msg))
            self.out("Headers : ")
            self.out(response.headers)
        except Exception as e:
            self.out(traceback.format_exc())
        tool.line()
        return
    # 访问地址后 set-cookie自动被设置
    def doGet(self, url):
        response = "error" 
        try:
            response = self.opener.open(url)
            # self.show(response)
        except Exception as e:
            self.out(traceback.format_exc())
        return response
    def doPost(self, url=None, postData=None):
        response = "error" 
        try:
            if(postData != None):
                postData = urllib.urlencode(postData)
                response = self.opener.open(url, postData) 
                # response = urllib2.urlopen(urllib2.Request(url, data, header))
            else:
                response  = self.opener.open(url)
            # self.show(response)
        except Exception as e:
            self.out(traceback.format_exc())

        return response
    def do(self, url=None, postData=None):
        if(url != None and url != ""):
            return self.doPost(url, postData)
        return "error, url=null?"
    def doJson(self, url="", postData=None):
        res = {}
        responce = self.do(url, postData)
        if(type(responce) == str):
            jsonStr = responce
        else:
            jsonStr = responce.read()
        if(jsonStr != None and type(jsonStr) == str):
            jsonStr = jsonStr.strip()
            res = tool.toJson(jsonStr)
        else:
            self.out("responce 读取失败,url:" + str(url) + " data:" + str(postData))
        return res

2.解析json/html字符串,取出表单提交需要用到的token之类的参数
html使用BeautifulSoup解析dom,提取所需数据,json使用json模块解析,并使用递归编码对象dict为可辨识编码utf-8

3.长连接socket/webSocket/socketIo的通信
通过http获取到的cookie键值对,html参数token,socket服务器地址配置 等数据,去建立长连接,实现发送、获取推送功能,处理特定数据 事件
socketIo工具类,调用者需要在任意地方初始化并connect 且之后在主线程中调用waitRead!否则不能获取到推送消息

from socketIO_client import SocketIO

import time
import tool
import traceback


class Socket:
    def __init__(self):
        self.socket = None

        self.ifOn = False

        # self.connect()
        # self.startThreadRead()
        # self.socket.wait_for_callbacks(seconds=1)
        return
    def connect(self, url, port):
        self.close()
        self.url = url
        self.port = port
        self.out("Connect url:" + self.url + " port:" + str(self.port))
        self.socket = SocketIO(self.url,port=self.port) # , params=self.config)
        # self.socket.on('message', on_message)
        self.out("Connect over ")
        self.ifOn = True
        return
    def emit(self, type, data, callback=None):
        return self.send(type, data, callback)
    def send(self, type, data, callback=None):
        if(self.socket == None):
            self.connect()
        tool.line()
        self.out("send:")
        self.out("type:" + type)
        self.out("data:" + str(data))

        self.socket.emit(type, data, callback)
        tool.line()
        return
    def close(self):
        self.ifOn = False
        if(self.socket != None):
            self.out("Close connect!")
            # self.socket.disconnect()
            self.socket = None
            time.sleep(1)
        return
    def waitRead(self, onException):
        while(True):
            if(self.ifOn):
                self.out("开启等待读取!")
            while(self.socket):
                try:
                    # self.out("wait")
                    self.socket.wait(1)
                except Exception as e:
                    self.out(traceback.format_exc())
                    onException(e)
                time.sleep(1)
            time.sleep(5)
            # onException("while socket is None")
        return

    def on(self, item, method):
        self.socket.on(item, method)
        return
    def out(self, obj):
        print("socketio." + str(obj))

实际案例:

#!/usr/bin/python
#-*- coding:utf-8 -*-

import re
import sys
import time
import json
import traceback
import uuid
import tool
import BeautifulSoup
from socketIO_client import SocketIO
from socketIo import Socket

from http import Http
from robot import Robot
from tool import ThreadRun
from python_sqlite import Database



# cochat 自动化
class AutoCochat:
    def __init__(self, name="Test", id="1231231812", pwd="adf"):
        self.id = id
        self.pwd = pwd
        self.name = name 
        self.http = Http() 
        self.socket = Socket()
        self.onConnect = False
        self.ifOk = False
        self.db.execute(
            ''' 
            create table if not exists music(
                url         text primary key,
                name        text,
                fromName    text,
                count       text
            )
            ''' )

        return
    # 日志输出
    def out(self, obj):
        print(time.strftime("%Y%m%d %H:%M:%S", time.localtime()) + "." + self.name + "." + str(obj))
        return

    def login(self):
        i = 0
        while(self.ifOk == False or i <= 0):
            try:
                self.whileLogin()
                i = i + 1
            except Exception as e:
                self.out(traceback.format_exc())
            self.out("登录异常,5s后重试 try:" + str(i))
            time.sleep(5)
        return

    # 认证登录
    def whileLogin(self):
        self.ifOk = False
        if(self.onConnect):
            self.out("已经在尝试登录")
            return
        self.socket.close()
        self.onConnect = True
        self.out("尝试登录:")
        obj = self.http.doJson("http://picc.cochat.cn/SY_ORG_LOGIN.login.do?DESKTOP_OS=Win10&USER_LAST_BROWSER=Win32&USER_LAST_CLIENT=2.5.1&USER_LAST_OS=DESKTOP&USER_LAST_PCNAME=%7B%7D",{
            "CONF_VARS":"*",
            "ORG_VARS":"true",
            "logintype":"mobile",
            "id":self.id,
            "password":self.pwd
        })
        if(obj.get("error","") != ""):
            self.onConnect = False
            return
        self.loginUser = obj
        token = obj.get("USER_TOKEN", "")
        self.out("登录结果 token:" + token)
        urlWithPort = obj.get("CONF_VARS", {}).get("@C_SY_COMM_SOCKET_SERV_V1.0@", "http://cochat.cn:9091")
        uus = urlWithPort.split(':')
        port = int(uus[2])
        url = uus[1][2:999] #ccc.cn 不需要ws http 只需要ip 域名

        self.socketServerUrl = urlWithPort
        self.socketUrl = url
        self.socketPort = port
        self.showUser()

        self.config = {
            "transports":['websocket', 'polling'],  # websocket优先
            "timeout":5 * 1000, # 超时时间
            "forceNew": True,
            "reconnection" : False
        };
        self.out("socket开始")
        self.socket.connect(url,port) # , params=self.config)
        self.out("socket连接完成,开始初始化事件")

        socketMsgTypes = ("connect", "disconnect","message", "event")
        for item in socketMsgTypes:
            if(hasattr(self, item)):
                method = getattr(self, item)
                if(callable(method)):
                    self.socket.on(item, method)
                else:
                    self.out("变量而非方法" + item + "回调?")
            else:
                self.out("属性" + item + "不存在,是否写错了名字?")
        # self.socket.on("message", self.message)

        self.out("socket初始化事件完成,开始发送认证")
        self.data = {
            "userName":obj.get("USER_CODE", ""),
            "displayName": "ccc",# tool.encode(obj.get("ORG_VARS", {}).get("@USER_NAME@", "") ),
            "odept":obj.get("ORG_VARS", {}).get("@ODEPT_CODE@", ""),
            "token":obj.get("USER_TOKEN", ""),
            "uuid":"" + str(uuid.uuid1()),
            "version":obj.get("USER_CODE", "") + "_LAST_MSG"
        }
        self.out(self.data)
        self.socket.emit('loginv17', self.data, self.onSocketLogin)
        self.out("已发送认证信息")
        self.ifOk = True
        self.onConnect = False
        return
    def connect(self, *args):
        print("connect")
        print(args)
        # self.login()
        return
    def disconnect(self, *args):
        print("disconnect")
        print(args)
        self.login()
        return 
    def onException(self, *args):
        tool.line()
        self.out("onexception")
        print(args)
    def turnArray(self, args):
        if(args and len(args) == 1 and (type(args[0]) is list) ):
            res = args[0]
        else:
            res = list(args)
        return res
    def message(self, *args): # 普通消息
        try:

            # tool.line()
            args = self.turnArray(args) 
            if(len(args) == 2):
                data = args[0]
                data = tool.toJson(data)
                fro = data.get("from", {})
                to = data.get("to", {})
                contact = data.get("contact", {})
                msg = data.get("body")
                fullId = contact.get("fullId")
                sessionName = contact.get("nickName", "")

                uid = self.data.get("uuid")
                tTag = data.get("timeMillis", tool.getNowTime())
                self.out("Msg:" + fro.get("nickName","from") + ">>" + msg + ">>" + to.get("nickName","to") + " time:" + data.get("time"))

                self.socket.emit("updateMsgStatus", {
                    "messages":data.get("id","")
                })

                # obj = {}
                # # if(contact.get("type") == "GROUP"):
                # msg = self.robot.do(msg, fro.get("nickName"))
                # obj["body"] = msg #"666" + str(tool.getNowTime())
                # obj["bodyType"] = "text"
                # obj["clientId"] = str(uuid.uuid1())
                # obj["retry"] = 1
                # obj["from"] = {}
                # obj["from"]["fullId"] = "u__" + self.data.get("userName")
                # obj["from"]["id"] = self.data.get("userName")
                # obj["from"]["nickName"] = "fromnickname"
                # obj["to"] = {}
                #
                # if(contact.get("type") == "GROUP"):
                #     obj["to"]["fullId"] = contact.get("fullId")
                # else:
                #     obj["to"]["fullId"] = fro.get("fullId")
                # obj["to"]["nickName"] = "tonickname"
                #
                # obj["from"]["nickName"] = "from-nickName"
                # obj["to"]["nickName"] = "to-nickName"
                #
                # print(obj)
                # self.socket.send("message", obj)
            else:
                self.out("args len 不合理数据:" + str(args)[0:40])
                # print(args)
                # tool.line()
        except Exception as e:
            self.out(traceback.format_exc())

        return
    def event(self, *args): # 事件消息 群创建?
        tool.line()
        print("event")
        print(str(args)[0:40])
        tool.line()
        return
    def onSocketLogin(self, *data):
        self.out("socket登录回调:")
        print(data)
        return

    def send(self, type, data):
        self.socket.send(type, data)
        return


    def showUser(self):
        tool.line()
        obj = self.loginUser
        user = obj.get("ORG_VARS", {})
        self.out(user.get("@USER_NAME@", ""))
        self.out(user.get("@USER_POST@", ""))
        self.out(user.get("@LOGIN_NAME@", ""))
        self.out("USER_CODE:" + obj.get("USER_CODE", ""))
        self.out("USER_TOKEN:" + user.get("UESR_TOKEN", ""))
        self.out("socketServerUrl:" + self.socketServerUrl)
        self.out("to url:" + self.socketUrl)
        self.out("to port:" + str(self.socketPort))

        tool.line()
        return
if __name__ == '__main__':
    obj = AutoCochat("Test", "1828221", "654321")
    obj.test()

自从学会了 使用代码去 模拟用户 操作http和socket,新世界大门
就想把所有重复的用户浏览器操作、程序操作等 都给用代码自动化处理,(比如:要是能把博客日志 定期使用python脚本 编写并发布博客(⊙﹏⊙))

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值