The pandas DataFrame object
a pandas series represents a single array of values, with an index label for each value.if you want to have more than one series of data that is aligned by a common index, then a Pandas DataFrame is used.
import pandas as pd
from pandas import DataFrame, Series
dates = pd.date_range('2019-05-18', '2019-05-25')
temp_chennai = Series([36, 37, 36, 37, 37, 37, 37, 37],
index = dates)
temp_delhi = Series([34, 39, 41, 41, 41, 41, 41, 42],
index = dates)
# create a DataFrame from the two Series objects temp_chennai and temp_delhi
# and give them column names
temps_df = DataFrame({
"chennai" : temp_chennai,
"Delhi" : temp_delhi
})
temps_df
|
chennai |
Delhi |
2019-05-18 |
36 |
34 |
2019-05-19 |
37 |
39 |
2019-05-20 |
36 |
41 |
2019-05-21 |
37 |
41 |
2019-05-22 |
37 |
41 |
2019-05-23 |
37 |
41 |
2019-05-24 |
37 |
41 |
2019-05-25 |
37 |
42 |
temps_df['chennai'] # get the column with the name chennai
2019-05-18 36
2019-05-19 37
2019-05-20 36
2019-05-21 37
2019-05-22 37
2019-05-23 37
2019-05-24 37
2019-05-25 37
Freq: D, Name: chennai, dtype: int64
temps_df['Delhi'] # get the column with the name Delhi
2019-05-18 34
2019-05-19 39
2019-05-20 41
2019-05-21 41
2019-05-22 41
2019-05-23 41
2019-05-24 41
2019-05-25 42
Freq: D, Name: Delhi, dtype: int64
temps_df.chennai # gretrieve the chennai column through property syntax
2019-05-18 36
2019-05-19 37
2019-05-20 36
2019-05-21 37
2019-05-22 37
2019-05-23 37
2019-05-24 37
2019-05-25 37
Freq: D, Name: chennai, dtype: int64
temp_diffs = abs(temps_df.chennai - temps_df.Delhi)
temps_df['Difference'] = temp_diffs
temps_df
|
chennai |
Delhi |
Difference |
2019-05-18 |
36 |
34 |
2 |
2019-05-19 |
37 |
39 |
2 |
2019-05-20 |
36 |
41 |
5 |
2019-05-21 |
37 |
41 |
4 |
2019-05-22 |
37 |
41 |
4 |
2019-05-23 |
37 |
41 |
4 |
2019-05-24 |
37 |
41 |
4 |
2019-05-25 |
37 |
42 |
5 |
temps_df.columns # get columns
Index(['chennai', 'Delhi', 'Difference'], dtype='object')
2019-05-19 2
2019-05-20 5
2019-05-21 4
Freq: D, Name: Difference, dtype: int64
temps_df.iloc[0] # get the row array postition 0
chennai 36
Delhi 34
Difference 2
Name: 2019-05-18 00:00:00, dtype: int64
/home/mmblack/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing
See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
"""Entry point for launching an IPython kernel.
Index(['chennai', 'Delhi', 'Difference'], dtype='object')
temps_df.loc['2019-05-19']
chennai 37
Delhi 39
Difference 2
Name: 2019-05-19 00:00:00, dtype: int64
temps_df.iloc[[1, 3, 5]] # select only rows 1, 3, 5
|
chennai |
Delhi |
Difference |
2019-05-19 |
37 |
39 |
2 |
2019-05-21 |
37 |
41 |
4 |
2019-05-23 |
37 |
41 |
4 |
temps_df.Delhi >40 # which values in Delhi column are great then 40
2019-05-18 False
2019-05-19 False
2019-05-20 True
2019-05-21 True
2019-05-22 True
2019-05-23 True
2019-05-24 True
2019-05-25 True
Freq: D, Name: Delhi, dtype: bool
temps_df[temps_df.Delhi > 40] # return the rows where the temps for delhi great then 40
|
chennai |
Delhi |
Difference |
2019-05-20 |
36 |
41 |
5 |
2019-05-21 |
37 |
41 |
4 |
2019-05-22 |
37 |
41 |
4 |
2019-05-23 |
37 |
41 |
4 |
2019-05-24 |
37 |
41 |
4 |
2019-05-25 |
37 |
42 |
5 |