diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/temp.ipynb b/temp.ipynb new file mode 100644 index 0000000..3812405 --- /dev/null +++ b/temp.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: html2text in d:\\conda\\envs\\xl\\lib\\site-packages (2024.2.26)\n", + "Requirement already satisfied: lxml in d:\\conda\\envs\\xl\\lib\\site-packages (5.1.0)\n", + "Collecting pyperclip\n", + " Downloading pyperclip-1.8.2.tar.gz (20 kB)\n", + " Preparing metadata (setup.py): started\n", + " Preparing metadata (setup.py): finished with status 'done'\n", + "Building wheels for collected packages: pyperclip\n", + " Building wheel for pyperclip (setup.py): started\n", + " Building wheel for pyperclip (setup.py): finished with status 'done'\n", + " Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11136 sha256=6e4bca73fa5bfe452a9bf543697a8a05c04392e67b7d3e8fa75ec9b4abba6b75\n", + " Stored in directory: c:\\users\\25086\\appdata\\local\\pip\\cache\\wheels\\70\\bd\\ba\\8ae5c080c895c9360fe6e153acda2dee82527374467eae061b\n", + "Successfully built pyperclip\n", + "Installing collected packages: pyperclip\n", + "Successfully installed pyperclip-1.8.2\n" + ] + } + ], + "source": [ + "!pip install html2text lxml pyperclip\n", + "!pip install Pillow pyperclip" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T05:07:12.519422Z", + "start_time": "2024-03-23T05:07:08.129646Z" + } + }, + "id": "6d25e442e9ebdea2", + "execution_count": 50 + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2024-03-23T04:41:47.923423Z", + "start_time": "2024-03-23T04:41:47.920333Z" + } + }, + "outputs": [], + "source": [ + "import requests\n", + "from html2text import HTML2Text\n", + "from lxml import etree\n", + "from html import unescape\n", + "import os" + ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "url = 'https://blog.csdn.net/ysblogs/article/details/88530124'" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T04:41:48.700814Z", + "start_time": "2024-03-23T04:41:48.697800Z" + } + }, + "id": "1a9f95e42361f50f", + "execution_count": 41 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "\n", + "headers = {\n", + " 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',\n", + " 'Cookie':'uuid_tt_dd=10_18798875550-1703252547299-411616; UserName=Shen_Mac; UserInfo=9286fcc0083a4ad1b4c60c58e55f1895; UserToken=9286fcc0083a4ad1b4c60c58e55f1895; UserNick=Shen_Mac; AU=295; UN=Shen_Mac; BT=1703497224926; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22Shen_Mac%22%2C%22scope%22%3A1%7D%7D; c_adb=1; historyList-new=%5B%5D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18798875550-1703252547299-411616!5744*1*Shen_Mac; __gads=ID=13b084a0d17ef9c6:T=1711116802:RT=1711116802:S=ALNI_MbkIcB7VJ9bHN2usHc1aet0Bp4nsw; __gpi=UID=00000d566881fb24:T=1711116802:RT=1711116802:S=ALNI_MYqFMHAbLHe1BhJTcDnS7bonchWMw; __eoi=ID=346b3efd1cf0c8ad:T=1711116802:RT=1711116802:S=AA-Afjaui7q1ow-WAFxJ88dWJUAC; _ga=GA1.2.582472258.1711123815; _gid=GA1.2.114771557.1711123815; _ga_7W1N0GEY1P=GS1.1.1711123814.1.1.1711123884.60.0.0; c_segment=0; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1711013381,1711116798,1711123812,1711164937; dc_sid=ddb746a22f3aef0ce41bb5e03a472d00; dc_session_id=10_1711166928729.783391; c_first_ref=www.bing.com; c_dl_fref=https://blog.csdn.net/naer_chongya/article/details/131665892; _clck=5u5e4g%7C2%7Cfkb%7C0%7C1523; c_dl_prid=1711167202984_265612; c_dl_rid=1711167374683_557146; c_dl_fpage=/download/weixin_42144086/19393382; c_dl_um=distribute.pc_relevant.none-task-blog-2%7Edefault%7Ebaidujs_baidulandingword%7Edefault-0-129448279-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_medium=distribute.pc_relevant.none-task-download-2%7Edefault%7EBlogCommendFromBaidu%7EAntiPaid-6-19393382-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_relevant_index=10; _clsk=o44fc0%7C1711167378071%7C2%7C0%7Cn.clarity.ms%2Fcollect; c_pref=https%3A//blog.csdn.net/naer_chongya/article/details/131665892; c_ref=https%3A//www.bing.com/; firstDie=1; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011044944.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011045003.png%22%2C%22articleNum%22%3A0%2C%22type%22%3A0%2C%22oldUser%22%3Afalse%2C%22useSeven%22%3Atrue%2C%22oldFullVersion%22%3Afalse%2C%22userName%22%3A%22Shen_Mac%22%7D; c_first_page=https%3A//blog.csdn.net/ysblogs/article/details/88530124; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1711167957; SidecHatdocDescBoxNum=true; log_Id_click=670; waf_captcha_marker=fbbc908b3209860a12a2f854c0aded8eca8c16814748fcb2bd2f1879052f07ab; c_dsid=11_1711168032071.771919; c_page_id=default; dc_tos=sas9pc; log_Id_pv=741; log_Id_view=27511'\n", + "}\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T04:41:48.958821Z", + "start_time": "2024-03-23T04:41:48.955804Z" + } + }, + "id": "34ae80cfe01527bc", + "execution_count": 42 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "\n", + "r=requests.get(url, headers=headers)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T04:41:49.970131Z", + "start_time": "2024-03-23T04:41:49.450823Z" + } + }, + "id": "1266447180328281", + "execution_count": 43 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "look for text...\n" + ] + } + ], + "source": [ + "html = r.content.decode(\"utf8\")\n", + "# print(html)\n", + "tree = etree.HTML(html)\n", + "print(\"look for text...\")\n", + "# 找到需要的html块\n", + "title = tree.xpath('//*[@id=\"articleContentId\"]/text()')[0]\n", + "block = tree.xpath('//*[@id=\"content_views\"]')\n", + "# html\n", + "ohtml = unescape(etree.tostring(block[0]).decode(\"utf8\"))\n", + "# 纯文本\n", + "text = block[0].xpath('string(.)').strip()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T04:41:55.502121Z", + "start_time": "2024-03-23T04:41:55.492083Z" + } + }, + "id": "6577f8eea4b0bb70", + "execution_count": 45 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "write markdown...\n" + ] + } + ], + "source": [ + "with open(f\"{title}.md\", 'w', encoding='utf8') as md_file:\n", + " # 保存markdown\n", + " print(\"write markdown...\")\n", + " text_maker = HTML2Text()\n", + " # md转换\n", + " md_text = text_maker.handle(html)\n", + " md_file.write(md_text)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T04:43:05.647601Z", + "start_time": "2024-03-23T04:43:05.602362Z" + } + }, + "id": "bafba43dece27d97", + "execution_count": 49 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "图像已转换为Base64并复制到剪贴板。\n" + ] + } + ], + "source": [ + "from PIL import ImageGrab\n", + "import pyperclip\n", + "import base64\n", + "\n", + "# 从剪贴板中获取图像\n", + "image = ImageGrab.grabclipboard()\n", + "\n", + "\n", + "if image is not None:\n", + " # 将图像转换为Base64编码\n", + " buffered = image.convert(\"RGB\").tobytes()\n", + " base64_image = base64.b64encode(buffered).decode(\"utf-8\")\n", + "\n", + " # 将Base64编码的图像放入剪贴板\n", + " pyperclip.copy(base64_image)\n", + " print(\"图像已转换为Base64并复制到剪贴板。\")\n", + "else:\n", + " print(\"剪贴板中没有图像。\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-03-23T05:10:21.387867Z", + "start_time": "2024-03-23T05:10:21.356022Z" + } + }, + "id": "df52d0199e805945", + "execution_count": 58 + }, + { + "cell_type": "code", + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "50799972739dbff4" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}