This repository has been archived on 2024-05-09. You can view files and clone it, but cannot push or open issues or pull requests.
Html2MrakdownOfPython/temp.ipynb

251 lines
9.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: html2text in d:\\conda\\envs\\xl\\lib\\site-packages (2024.2.26)\n",
"Requirement already satisfied: lxml in d:\\conda\\envs\\xl\\lib\\site-packages (5.1.0)\n",
"Collecting pyperclip\n",
" Downloading pyperclip-1.8.2.tar.gz (20 kB)\n",
" Preparing metadata (setup.py): started\n",
" Preparing metadata (setup.py): finished with status 'done'\n",
"Building wheels for collected packages: pyperclip\n",
" Building wheel for pyperclip (setup.py): started\n",
" Building wheel for pyperclip (setup.py): finished with status 'done'\n",
" Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11136 sha256=6e4bca73fa5bfe452a9bf543697a8a05c04392e67b7d3e8fa75ec9b4abba6b75\n",
" Stored in directory: c:\\users\\25086\\appdata\\local\\pip\\cache\\wheels\\70\\bd\\ba\\8ae5c080c895c9360fe6e153acda2dee82527374467eae061b\n",
"Successfully built pyperclip\n",
"Installing collected packages: pyperclip\n",
"Successfully installed pyperclip-1.8.2\n"
]
}
],
"source": [
"!pip install html2text lxml pyperclip\n",
"!pip install Pillow pyperclip"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T05:07:12.519422Z",
"start_time": "2024-03-23T05:07:08.129646Z"
}
},
"id": "6d25e442e9ebdea2",
"execution_count": 50
},
{
"cell_type": "code",
"execution_count": 40,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-03-23T04:41:47.923423Z",
"start_time": "2024-03-23T04:41:47.920333Z"
}
},
"outputs": [],
"source": [
"import requests\n",
"from html2text import HTML2Text\n",
"from lxml import etree\n",
"from html import unescape\n",
"import os"
]
},
{
"cell_type": "code",
"outputs": [],
"source": [
"url = 'https://blog.csdn.net/ysblogs/article/details/88530124'"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T04:41:48.700814Z",
"start_time": "2024-03-23T04:41:48.697800Z"
}
},
"id": "1a9f95e42361f50f",
"execution_count": 41
},
{
"cell_type": "code",
"outputs": [],
"source": [
"\n",
"headers = {\n",
" 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',\n",
" 'Cookie':'uuid_tt_dd=10_18798875550-1703252547299-411616; UserName=Shen_Mac; UserInfo=9286fcc0083a4ad1b4c60c58e55f1895; UserToken=9286fcc0083a4ad1b4c60c58e55f1895; UserNick=Shen_Mac; AU=295; UN=Shen_Mac; BT=1703497224926; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22Shen_Mac%22%2C%22scope%22%3A1%7D%7D; c_adb=1; historyList-new=%5B%5D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18798875550-1703252547299-411616!5744*1*Shen_Mac; __gads=ID=13b084a0d17ef9c6:T=1711116802:RT=1711116802:S=ALNI_MbkIcB7VJ9bHN2usHc1aet0Bp4nsw; __gpi=UID=00000d566881fb24:T=1711116802:RT=1711116802:S=ALNI_MYqFMHAbLHe1BhJTcDnS7bonchWMw; __eoi=ID=346b3efd1cf0c8ad:T=1711116802:RT=1711116802:S=AA-Afjaui7q1ow-WAFxJ88dWJUAC; _ga=GA1.2.582472258.1711123815; _gid=GA1.2.114771557.1711123815; _ga_7W1N0GEY1P=GS1.1.1711123814.1.1.1711123884.60.0.0; c_segment=0; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1711013381,1711116798,1711123812,1711164937; dc_sid=ddb746a22f3aef0ce41bb5e03a472d00; dc_session_id=10_1711166928729.783391; c_first_ref=www.bing.com; c_dl_fref=https://blog.csdn.net/naer_chongya/article/details/131665892; _clck=5u5e4g%7C2%7Cfkb%7C0%7C1523; c_dl_prid=1711167202984_265612; c_dl_rid=1711167374683_557146; c_dl_fpage=/download/weixin_42144086/19393382; c_dl_um=distribute.pc_relevant.none-task-blog-2%7Edefault%7Ebaidujs_baidulandingword%7Edefault-0-129448279-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_medium=distribute.pc_relevant.none-task-download-2%7Edefault%7EBlogCommendFromBaidu%7EAntiPaid-6-19393382-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_relevant_index=10; _clsk=o44fc0%7C1711167378071%7C2%7C0%7Cn.clarity.ms%2Fcollect; c_pref=https%3A//blog.csdn.net/naer_chongya/article/details/131665892; c_ref=https%3A//www.bing.com/; firstDie=1; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011044944.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011045003.png%22%2C%22articleNum%22%3A0%2C%22type%22%3A0%2C%22oldUser%22%3Afalse%2C%22useSeven%22%3Atrue%2C%22oldFullVersion%22%3Afalse%2C%22userName%22%3A%22Shen_Mac%22%7D; c_first_page=https%3A//blog.csdn.net/ysblogs/article/details/88530124; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1711167957; SidecHatdocDescBoxNum=true; log_Id_click=670; waf_captcha_marker=fbbc908b3209860a12a2f854c0aded8eca8c16814748fcb2bd2f1879052f07ab; c_dsid=11_1711168032071.771919; c_page_id=default; dc_tos=sas9pc; log_Id_pv=741; log_Id_view=27511'\n",
"}\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T04:41:48.958821Z",
"start_time": "2024-03-23T04:41:48.955804Z"
}
},
"id": "34ae80cfe01527bc",
"execution_count": 42
},
{
"cell_type": "code",
"outputs": [],
"source": [
"\n",
"r=requests.get(url, headers=headers)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T04:41:49.970131Z",
"start_time": "2024-03-23T04:41:49.450823Z"
}
},
"id": "1266447180328281",
"execution_count": 43
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"look for text...\n"
]
}
],
"source": [
"html = r.content.decode(\"utf8\")\n",
"# print(html)\n",
"tree = etree.HTML(html)\n",
"print(\"look for text...\")\n",
"# 找到需要的html块\n",
"title = tree.xpath('//*[@id=\"articleContentId\"]/text()')[0]\n",
"block = tree.xpath('//*[@id=\"content_views\"]')\n",
"# html\n",
"ohtml = unescape(etree.tostring(block[0]).decode(\"utf8\"))\n",
"# 纯文本\n",
"text = block[0].xpath('string(.)').strip()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T04:41:55.502121Z",
"start_time": "2024-03-23T04:41:55.492083Z"
}
},
"id": "6577f8eea4b0bb70",
"execution_count": 45
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"write markdown...\n"
]
}
],
"source": [
"with open(f\"{title}.md\", 'w', encoding='utf8') as md_file:\n",
" # 保存markdown\n",
" print(\"write markdown...\")\n",
" text_maker = HTML2Text()\n",
" # md转换\n",
" md_text = text_maker.handle(html)\n",
" md_file.write(md_text)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T04:43:05.647601Z",
"start_time": "2024-03-23T04:43:05.602362Z"
}
},
"id": "bafba43dece27d97",
"execution_count": 49
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"图像已转换为Base64并复制到剪贴板。\n"
]
}
],
"source": [
"from PIL import ImageGrab\n",
"import pyperclip\n",
"import base64\n",
"\n",
"# 从剪贴板中获取图像\n",
"image = ImageGrab.grabclipboard()\n",
"\n",
"\n",
"if image is not None:\n",
" # 将图像转换为Base64编码\n",
" buffered = image.convert(\"RGB\").tobytes()\n",
" base64_image = base64.b64encode(buffered).decode(\"utf-8\")\n",
"\n",
" # 将Base64编码的图像放入剪贴板\n",
" pyperclip.copy(base64_image)\n",
" print(\"图像已转换为Base64并复制到剪贴板。\")\n",
"else:\n",
" print(\"剪贴板中没有图像。\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-23T05:10:21.387867Z",
"start_time": "2024-03-23T05:10:21.356022Z"
}
},
"id": "df52d0199e805945",
"execution_count": 58
},
{
"cell_type": "code",
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "50799972739dbff4"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}