I/O API Tools
-- CSV and textual Files
read_csv
read_table
to_csv
-- Reading Data in CSV or Text Files
csvframe = read_csv(ímyCSV_01.csví)
csvframe
read_table(ích05_01.csví,sep=í,í)
read_csv(ích05_02.csví)
read_csv(ích05_02.csví, header=None)
read_csv(ích05_02.csví, names=[íwhiteí,íredí,íblueí,ígreení,íanimalí])
read_csv(ích05_03.csví, index_col=[ícolorí,ístatusí])
-- Using RegExp for Parsing TXT Files
read_table(ích05_04.txtí,sep=í\s*í)
read_table(ích05_05.txtí,sep=í\D*í,header=None)
read_table(ích05_06.txtí,sep=í,í,skiprows=[0,1,3,6])
-- Reading TXT Files into Parts or Partially
read_csv(ích05_02.csví,skiprows=[2],nrows=3,header=None)
out = Series()
i=0
pieces = read_csv(ích05_01.csví,chunksize=3)
for piece in pieces:
out.set_value(i,piece[íwhiteí].sum())
i = i + 1
-- Writing Data in CSV
frame2
frame2.to_csv('ch05_07.csv')
frame2.to_csv(ích05_07b.csví, index=False, header=False)
frame3
frame3.to_csv(ích05_09.csví, na_rep =íNaNí)
-- Reading and Writing HTML Files
read_html
to_html
-- Writing Data in HTML
frame = pd.DataFrame(np.arange(4).reshape(2,2))
print(frame.to_html())
frame = pd.DataFrame( np.random.random((4,4)),
index = [íwhiteí,íblackí,íredí,íblueí],
columns = [íupí,ídowní,írightí,íleftí])
s = [í<HTML>í]
s.append(í<HEAD><TITLE>My DataFrame</TITLE></HEAD>í)
s.append(í<BODY>í)
s.append(frame.to_html())
s.append(í</BODY></HTML>í)
html = íí.join(s)
html_file = open(ímyFrame.htmlí,íwí)
html_file.write(html)
html_file.close()
-- Reading Data from an HTML File
web_frames = pd.read_html(ímyFrame.htmlí)
web_frames[0]
ranking = pd.read_html(íhttp://www.meccanismocomplesso.org/en/meccanismo-complesso-sito-2/classifica-punteggio/í)
ranking[0]
-- Reading Data from XML
from lxml import objectify
xml = objectify.parse(íbooks.xmlí)
xml
root = xml.getroot()
root.Book.Author
root.Book.PublishDate
root.getchildren()
[child.tag for child in root.Book.getchildren()]
[child.text for child in root.Book.getchildren()]
def etree2df(root):
column_names = []
for i in range(0,len(root.getchildren()[0].getchildren())):
column_names.append(root.getchildren()[0].getchildren()[i].tag)
xml:frame = pd.DataFrame(columns=column_names)
for j in range(0, len(root.getchildren())):
obj = root.getchildren()[j].getchildren()
texts = []
for k in range(0, len(column_names)):
texts.append(obj[k].text)
row = dict(zip(column_names, texts))
row_s = pd.Series(row)
row_s.name = j
xml:frame = xml:frame.append(row_s)
return xml:frame
etree2df(root)
-- Reading and Writing Data on Microsoft Excel Files
to_excel()
read_excel()
pd.read_excel(ídata.xlsí)
pd.read_excel(ídata.xlsí,íSheet2í)
pd.read_excel(ídata.xlsí,1)
frame = pd.DataFrame(np.random.random((4,4)),
index = [íexp1í,íexp2í,íexp3í,íexp4í],
columns = [íJan2015í,íFab2015í,íMar2015í,íApr2005í])
frame
frame.to_excel('data2.xlsx')
-- JSON Data
read_json
to_json
frame = pd.DataFrame(np.arange(16).reshape(4,4),
index=[íwhiteí,íblackí,íredí,íblueí],
columns=[íupí,ídowní,írightí,íleftí])
frame.to_json(íframe.jsoní)
pd.read_json('frame.json')
from pandas.io.json import json_normalize
file = open(íbooks.jsoní,írí)
text = file.read()
text = json.loads(text)
json_normalize(text,íbooksí)
json_normalize(text2,íbooksí,[íwriterí,ínationalityí])
-- The Format HDF5
from pandas.io.pytables import HDFStore
frame = pd.DataFrame(np.arange(16).reshape(4,4),
index=[íwhiteí,íblackí,íredí,íblueí],
columns=[íupí,ídowní,írightí,íleftí])
store = HDFStore(ímydata.h5í)
store[íobj1í] = frame
frame2
store[íobj2í] = frame2
store
store['obj2']
-- PickleóPython Object Serialization
-- Serialize a Python Object with cPickle
import cPickle as pickle
data = { ícolorí: [íwhiteí,íredí], ívalueí: [5, 7]}
pickled_data = pickle.dumps(data)
print pickled_data
nframe = pickle.loads(pickled_data)
nframe
-- Pickling with pandas
frame = pd.DataFrame(np.arange(16).reshape(4,4), index = [íupí,ídowní,íleftí,írightí])
frame.to_pickle(íframe.pklí)
pd.read_pickle(íframe.pklí)
-- Interacting with Databases
from sqlalchemy import create_engine
engine = create_engine(ípostgresql://scott:tiger@localhost:5432/mydatabaseí)
engine = create_engine(ímysql+mysqldb://scott:tiger@localhost/fooí)
engine = create_engine(íoracle://scott:tiger@127.0.0.1:1521/sidnameí)
engine = create_engine(ímssql+pyodbc://mydsní)
engine = create_engine(ísqlite:///foo.dbí)
-- Loading and Writing Data with SQLite3
frame = pd.DataFrame( np.arange(20).reshape(4,5), columns=[íwhiteí,íredí,íblueí,íblackí,ígreení])
frame
engine = create_engine(ísqlite:///foo.dbí)
frame.to_sql(ícolorsí,engine)
pd.read_sql(ícolorsí,engine)
import sqlite3
query = """
CREATE TABLE test
(a VARCHAR(20), b VARCHAR(20),
c REAL, d INTEGER
);"""
con = sqlite3.connect(í:memory:í)
con.execute(query)
con.commit()
data = [(íwhiteí,íupí,1,3),
(íblackí,ídowní,2,8),
(ígreení,íupí,4,4),
(íredí,ídowní,5,5)]
stmt = "INSERT INTO test VALUES(?,?,?,?)"
con.executemany(stmt, data)
con.commit()
cursor = con.execute(íselect * from testí)
cursor
rows = cursor.fetchall()
rows
cursor.description
pd.DataFrame(rows, columns=zip(*cursor.description)[0])
-- Loading and Writing Data with PostgreSQL
pd.__version__
engine = create_engine(ípostgresql://postgres:password@localhost:5432/postgresí)
frame = pd.DataFrame(np.random.random((4,4)),
index=[íexp1í,íexp2í,íexp3í,íexp4í],
columns=[ífebí,ímarí,íaprí,ímayí]);
frame.to_sql(ídataframeí,engine)
psql -U postgres
pd.read_sql_table(ídataframeí,engine)
pd.read_sql_query(íSELECT index,apr,may FROM DATAFRAME WHERE apr > 0.5í,engine)
-- Reading and Writing Data with a NoSQL Database: MongoDB
mongod --dbpath C:\MongoDB_data
import pymongo
client = MongoClient(ílocalhostí,27017)
db = client.mydatabase
db
collection = db.mycollection
db[ímycollectioní]
collection
frame = pd.DataFrame( np.arange(20).reshape(4,5),
columns=[íwhiteí,íredí,íblueí,íblackí,ígreení])
frame
import json
record = json.loads(frame.T.to_json()).values()
record
collection.mydocument.insert(record)
cursor = collection[ímydocumentí].find()
dataframe = (list(cursor))
del dataframe[í_idí]
dataframe
Conclusions