凌的博客

您现在的位置是: 首页 > 学无止境 > python > 

python

python 正则规则测试 GUI PyQt5

2019-09-07 python 1573

1.jpg

import sys
from PyQt5.QtWidgets import QMainWindow, QWidget, QApplication, QPushButton, QFileDialog, QLineEdit, QHBoxLayout, \
    QVBoxLayout, QTextEdit, QDesktopWidget, QLabel, QDateTimeEdit, QCheckBox, QTableView, QHeaderView, \
    QAbstractItemView, QMessageBox
from PyQt5.QtCore import QThread, pyqtSignal, QObject, QDateTime, Qt
from PyQt5.QtGui import QStandardItemModel, QStandardItem
import os
import time
import re, requests


class CombWidget(QObject):
    def __init__(self):
        super().__init__()

    def combHBox(self, lst: list):
        h = QHBoxLayout()
        for x, y in lst:
            h.addWidget(x, y)
        wg = QWidget()
        wg.setLayout(h)
        return wg

    def combHBoxAlign(self, lst: list):
        h = QHBoxLayout()
        for x, y, align in lst:
            h.addWidget(x, y, align)
        wg = QWidget()
        wg.setLayout(h)
        return wg

    def combVBox(self, lst: list):
        h = QVBoxLayout()
        for x, y in lst:
            h.addWidget(x, y)
        wg = QWidget()
        wg.setLayout(h)
        return wg


error_index = 0


def get_content(url, charset="utf-8", timeout=30):
    global error_index
    content = ""

    if error_index > 3:
        # os.system('cmd/c start novel.exe')
        print("%d次错误退出" % error_index)
        return

    try:
        headers = {"User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0",
                   "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
                   # "Accept-Language":"en-US,en;q=0.5",
                   "Accept-Encoding": "deflate",
                   # "Connection":"keep-alive",
                   "Content-Type": "application/x-www-form-urlencoded",
                   }
        r = requests.get(url, timeout=timeout, headers=headers)

        if r.status_code == 200:
            content = r.content.decode(charset)
        else:
            print("Error:", r.status_code)

    except Exception as e:
        print("Error:", str(e))
        print("2秒后重新链接。。。 error_index: %s" % error_index)
        time.sleep(2)
        error_index += 1
        content = get_content(url)
    return content


class WorkThread(QThread):
    triggerHtml = pyqtSignal(str)
    url = ""

    def run(self):
        print("url:%s" % self.url)
        content = get_content(self.url, charset="utf-8")
        self.triggerHtml.emit(content)


class Testpattern(QMainWindow):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.statusBar = self.statusBar()
        self.setStatusBar(self.statusBar)

        self.statusBar.showMessage("初始化 test pattern")

        v = QVBoxLayout()
        comb = CombWidget()
        testcontent_btn = QPushButton("匹配")
        testcontent_btn.clicked.connect(self.patContent)
        test_btn = QPushButton("匹配")
        test_btn.clicked.connect(self.patDetail)

        gethtml_btn = QPushButton("获取网页内容")
        gethtml_btn.clicked.connect(self.getHtml)

        contentp_btn = QPushButton("content")
        contentp_btn.clicked.connect(lambda: self.contentpattern_text.insertPlainText('(?P<content>.+?)'))


        patbtns = comb.combVBox([(contentp_btn, 1), ])

        dcontentp_btn = QPushButton("content")
        dtitlep_btn = QPushButton("title")
        dimgp_btn = QPushButton("img")
        durlp_btn = QPushButton("url")
        dcontentp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<content>.+?)'))
        dtitlep_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<title>.{1,100}?)'))
        dimgp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<img>[^>]+?)'))
        durlp_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('(?P<url>[^>]+?)'))

        dpatbtns = comb.combVBox([(dcontentp_btn, 1), (dtitlep_btn, 1), (dimgp_btn, 1),(durlp_btn, 1), ])

        ddiv_btn = QPushButton("[^>]+?")
        dcontent_btn = QPushButton(".+")
        dcontent1_btn = QPushButton(".*")
        dspace_btn = QPushButton("\s+")
        dspace1_btn = QPushButton("\s*")
        ddiv_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('[^>]+?'))
        dcontent_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('.+'))
        dcontent1_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('.*'))
        dspace_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('\s+'))
        dspace1_btn.clicked.connect(lambda: self.pattern_text.insertPlainText('\s*'))

        dspacebtns = comb.combVBox([(ddiv_btn, 1), (dcontent_btn, 1),(dcontent1_btn, 1),(dspace_btn, 1),(dspace1_btn, 1), ])



        #<a href="(?P<url>[^>]+?)"><img src="(?P<img>[^>]+?)" alt="(?P<title>.{1,100}?)">
        self.url_txt = QLineEdit("http://www.jiuaitu.com/learn/")
        self.url_txt.setFocus(True)
        self.url_txt.setPlaceholderText("请输入网址")
        self.ret_text = QTextEdit("")
        self.ret_text.setPlaceholderText("网页内容")

        self.contentpattern_text = QTextEdit()
        self.contentpattern_text.setPlainText(r'<article>(?P<content>.+?)</article>')

        self.contentpattern_text.setPlaceholderText("内容规则")
        self.contentpattern_text.setFocus(False)
        self.contenthtml_text = QTextEdit("")
        self.contenthtml_text.setPlaceholderText("匹配内容")

        self.pattern_text = QTextEdit("")
        self.pattern_text.setPlainText('<img src="(?P<img>[^>]+?)" alt="(?P<title>.{1,100}?)">')
        self.pattern_text.setPlaceholderText("详细规则")
        self.html_text = QTextEdit("")
        self.html_text.setPlaceholderText("匹配内容")

        v.addWidget(comb.combHBox([(self.url_txt, 12), (gethtml_btn, 2), ]), 1)
        v.addWidget(comb.combHBox([(self.ret_text, 12), ]), 3)

        v.addWidget(
            comb.combHBoxAlign([(self.contentpattern_text, 12, Qt.AlignVCenter), (patbtns, 1, Qt.AlignTop),
                                (testcontent_btn, 2, Qt.AlignTop), ]),
            1)

        v.addWidget(comb.combHBox([(self.contenthtml_text, 12), ]), 5)

        v.addWidget(comb.combHBoxAlign(
            [(self.pattern_text, 12, Qt.AlignVCenter), (dpatbtns, 1, Qt.AlignTop), (dspacebtns, 1, Qt.AlignTop), (test_btn, 2, Qt.AlignTop), ]),
            1)

        v.addWidget(comb.combHBox([(self.html_text, 12), ]), 5)

        vwg = QWidget()
        vwg.setLayout(v)

        self.exec_time = time.time()

        self.setCentralWidget(vwg)

        self.workThread = WorkThread()
        self.workThread.triggerHtml.connect(self.showResult)

        self.setGeometry(300, 300, 800, 800)
        self.center()
        self.setWindowTitle("测试规则")

    def patContent(self):
        # <div class="blogs-list">(?P<content>.+)</div>
        print("匹配内容")
        try:
            pat = self.contentpattern_text.toPlainText()
            content = self.ret_text.toPlainText()
            print("pattren:%s" % pat)
            m = re.search(r"" + pat, content, re.S | re.I)

            if m is not None:
                dict = m.groupdict()
                self.contenthtml_text.setPlainText(dict["content"])

        except Exception as e:
            self.contenthtml_text.setPlainText(str(e))
            print(str(e))

    def patDetail(self):
        print("匹配详细")
        pats = {"content": '\(\?P<content>', "title": '\(\?P<title>', "img": '\(\?P<title>', "url": '\(\?P<url>'}

        try:
            pat = self.pattern_text.toPlainText()

            pats = {"content": '(?P<content>', "title": '(?P<title>', "img": '(?P<img>', "url": '(?P<url>'}
            pat_names = []
            for x in pats:
                if pats.get(x) in pat:
                    pat_names.append(x)
            print("pattren:%s" % pat_names)

            content = self.contenthtml_text.toPlainText()
            print("pattren:%s" % pat)

            # 内容
            if content == "":
                content = self.ret_text.toPlainText()


            m = re.finditer(r"" + pat, content, re.S | re.I)
            html = ""

            if m is not None:

                for dt in m:
                    for name in pat_names:
                        html = html + "<%s>:%s \n" % (name, dt.group(name))
                    html = html +"\n---------------------------------------------------------\n"


            self.html_text.setPlainText(html)

        except Exception as e:
            self.html_text.setPlainText(str(e))
            print(str(e))

    def showResult(self, text):
        self.statusBar.showMessage("请求成功 耗时 %.2f 秒" % (time.time() - self.exec_time))
        self.ret_text.setPlainText(text)

    def getHtml(self):
        self.url = self.url_txt.text()
        if self.url.strip() == "":
            QMessageBox.warning(self, "提示", "URL网址不能为空", QMessageBox.Yes | QMessageBox.No)
            return

        self.statusBar.showMessage("请求URL:%s" % self.url)
        self.workThread.url = self.url
        self.exec_time = time.time()
        self.workThread.start()

    def center(self):
        qr = self.frameGeometry()
        cp = QDesktopWidget().availableGeometry().center()
        qr.moveCenter(cp)
        self.move(qr.topLeft())


if __name__ == "__main__":
    app = QApplication(sys.argv)
    ex = Testpattern()
    ex.show()
    sys.exit(app.exec_())


文章评论

0条评论