以前做报表从Mysql数据里面提取数据很简单,几条简单的SHELL脚本就可以实现该功能,因为是自己使用,所以对于格式无所谓,以前使用的都是.csv格式,因为其是以逗号区分的,方便脚本处理。这次不一样了,虽然这次接到需求也是从Mysql中导出数据,但是!!导出文件的格式必须是.XLSX这是微软EXCEL软件自有的格式,包含特殊的文件头,所以之前的方法失效了,失效了… 折腾了好久,也请教了别人,终于,终于解决了这个需求。

环境:
   Centos 7.2.1511
   Python 2.7.5 -> 3.5

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
[root@localhost ~]# python -V
Python 2.7.5
[root@localhost ~]#
#安装easy_install命令
[root@localhost ~]# wget https://bootstrap.pypa.io/ez_setup.py -O - | python
#安装pip命令(python包管理软件)
[root@localhost ~]# wget https://pypi.python.org/packages/11/b6/abcb525026a4be042b486df43905d6893fb04f05aac21c32c638e939e447/pip-9.0.1.tar.gz#md5=35f01da33009719497f01a4ba69d63c9
[root@localhost ~]# tar -zxf pip-9.0.1.tar.gz
[root@localhost ~]# cd pip-9.0.1
[root@localhost pip-9.0.1]# python setup.py install
#安装python module
[root@localhost ~]# python select3.py
Traceback (most recent call last):
File "select3.py", line 3, in <module>
from sqlalchemy import create_engine
ImportError: No module named sqlalchemy
[root@localhost ~]#
#安装sqlalchemy模块
[root@localhost ~]# pip install sqlalchemy
Collecting pymysql
Downloading PyMySQL-0.7.11-py2.py3-none-any.whl (78kB)
100% |████████████████████████████████| 81kB 48kB/s
Installing collected packages: pymysql
Successfully installed pymysql-0.7.11
[root@localhost ~]#
#重复执行python select3.py 安装缺失的模块
[root@localhost ~]# pip install pymysql
[root@localhost ~]# pip install pandas
# Python 2.7不支持中文,报错如下,需要升级到 Python ,测试 Python 3.5.4 支持
[root@localhost ~]# python select3.py
Traceback (most recent call last):
File "select3.py", line 10, in <module>
sql.encode('gb18030')
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe6 in position 20: ordinal not in range(128)
[root@localhost ~]#

升级 Python 2.7.5 至 Python 3.5
不要动系统自带的Python,否则系统的某些功能都无法正常使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[root@localhost ~]# wget https://www.python.org/ftp/python/3.5.4/Python-3.5.4.tgz
[root@localhost ~]# yum install openssl-devel -y
[root@localhost ~]# tar -zxf Python-3.5.4.tgz
[root@localhost ~]# mkdir -p /usr/local/Python-3.5.4
[root@localhost ~]# cd Python-3.5.4
[root@localhost Python-3.5.4]# ./configure --prefix=/usr/local/Python-3.5.4/
[root@localhost Python-3.5.4]# make && make install
[root@localhost ~]# /usr/local/Python-3.5.4/bin/python3.5 -V
Python 3.5.4
[root@localhost ~]#
# 现在系统同时存在两个版本的 Python,因此脚本需要的模块需要重新安装,因为Python 3.5.2之后自带 pip3 和 setuptools,因此需要使用 pip3 安装所需要的模块
[root@localhost ~]# /usr/local/Python-3.5.4/bin/python3.5 select3.py
Traceback (most recent call last):
File "select3.py", line 3, in <module>
from sqlalchemy import create_engine
ImportError: No module named 'sqlalchemy'
[root@localhost ~]#
#安装所需要的 Python 模块
[root@localhost ~]# /usr/local/Python-3.5.4/bin/pip3.5 install sqlalchemy
[root@localhost ~]# /usr/local/Python-3.5.4/bin/pip3.5 install pymysql
[root@localhost ~]# /usr/local/Python-3.5.4/bin/pip3.5 install pandas #这个很慢
[root@localhost ~]# /usr/local/Python-3.5.4/bin/pip3.5 install openpyxl
[root@localhost ~]# cat /etc/profile.d/Python.sh
#!/bin/sh
Python3_5_home='/usr/local/Python-3.5.4'
export PATH=${Python3_5_home}/bin:$PATH
[root@localhost ~]# source /etc/profile.d/Python.sh
[root@localhost ~]# /usr/local/Python-3.5.4/bin/python3.5 select3.py
[root@localhost ~]# ll | grep output.xlsx
-rw-r--r-- 1 root root 4854 Oct 15 06:37 output.xlsx
[root@localhost ~]#

Py_mysql_to_excel

如果报这个错,好像是 查询SQL 时超时了,在同级目录下会生成 __pycache__文件夹,脚本换个目录(不要在含有__pycache__这个目录的目录中执行 py脚本),然后删掉其父目录,再执行 py 脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
Traceback (most recent call last):
File "/usr/local/Python-3.5.4/lib/python3.5/site-packages/pandas/compat/__init__.py", line 47, in <module>
import __builtin__ as builtins
ImportError: No module named '__builtin__'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "select.py
", line 4, in <module>
import pymysql
File "/usr/local/Python-3.5.4/lib/python3.5/site-packages/pymysql/__init__.py", line 92, in <module>
from . import connections as _orig_conn
File "/usr/local/Python-3.5.4/lib/python3.5/site-packages/pymysql/connections.py
", line 13, in <module>
import socket
File "/usr/local/Python-3.5.4/lib/python3.5/socket.py
", line 52, in <module>
import os, sys, io, selectors
File "/usr/local/Python-3.5.4/lib/python3.5/selectors.py
", line 11, in <module>
import select
File "/data/soros/job/report/jinrongban_data_back/test/select.py
", line 5, in <module>
import pandas as pd
File "/usr/local/Python-3.5.4/lib/python3.5/site-packages/pandas/__init__.py", line 23, in <module>
from pandas.compat.numpy import *
File "/usr/local/Python-3.5.4/lib/python3.5/site-packages/pandas/compat/__init__.py", line 60, in <module>
import http.client as httplib
File "/usr/local/Python-3.5.4/lib/python3.5/http/client.py
", line 739, in <module>
class HTTPConnection:
File "/usr/local/Python-3.5.4/lib/python3.5/http/client.py
", line 749, in HTTPConnection
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
AttributeError: module 'socket' has no attribute '_GLOBAL_DEFAULT_TIMEOUT'

Python脚本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[root@localhost ~]# cat select3.py
#!/usr/bin/env python
#-*- coding: utf8 -*-
from sqlalchemy import create_engine
import pymysql
import pandas as pd
from pandas import DataFrame,Series
engine=create_engine('mysql+pymysql://username:password@192.168.1.1/database_name?charset=gbk,pool_timeout=3000')
sql="select count(*) as '总数' from database.table1;"
sql.encode('gb18030')
df=pd.read_sql(sql,engine)
writer = pd.ExcelWriter('/root/output.xlsx')
df.to_excel(writer,'Sheet1',index=False)
writer.save()
[root@localhost ~]#

附件:
Python-3.5.4.tar.gz
select3.py
numpy-1.13.3-cp35-cp35m-manylinux1_x86_64.whl


本文出自”Jack Wang Blog”:http://www.yfshare.vip/2017/10/16/Python-读取Mysql生成EXCEL(XLSX)/