Creating a DataFrame from objects in pandas
Creating a DataFrame from objects

This introduction to pandas is derived from Data School's pandas Q&A with my own notes and code.

Creating a pandas DataFrame from another object

In [1]:
import pandas as pd
In [35]:
# .DataFrame is a constructor

# create a dictionary
ex_dic = {
    'id': [100, 101, 102],
    'color': ['red', 'blue', 'red']
}

# create a list of strings
columns = ['id', 'color']

index = ['a', 'b', 'c']

# Passing a dictionary
# key: column name
# value: series of values
df = pd.DataFrame(ex_dict, columns=columns, index=index)
df
Out[35]:
id color
a 100 red
b 101 blue
c 102 red
In [16]:
# list of lists
list_of_lists = [
    [100, 'red'],
    [101, 'blue'],
    [102, 'red']
]

# columns
columns = ['id', 'color']

pd.DataFrame(list_of_lists, columns=columns)
Out[16]:
id color
0 100 red
1 101 blue
2 102 red
In [18]:
import numpy as np
In [20]:
# create 4x2 random array 
# array is a list of lists
arr = np.random.rand(4, 2)
arr
Out[20]:
array([[ 0.66867334,  0.0496808 ],
       [ 0.24225703,  0.17014163],
       [ 0.37133698,  0.3160525 ],
       [ 0.76333377,  0.54704594]])
In [23]:
# columns
columns_new = ['one', 'two']

# pass in array and columns
pd.DataFrame(arr, columns=columns_new)
Out[23]:
one two
0 0.668673 0.049681
1 0.242257 0.170142
2 0.371337 0.316052
3 0.763334 0.547046
In [25]:
# np.arange
# inclusive of start
# exclusive of end
# step: 1 
# ascending order

# np.random.randint
# 60 to 101
# any random integer

# key: column name
# value: series of values
dict_new = {
    'student': np.arange(100, 110, 1),
    'test': np.random.randint(60, 101, 10)
}

pd.DataFrame(dict_new)
Out[25]:
student test
0 100 66
1 101 68
2 102 74
3 103 73
4 104 92
5 105 100
6 106 65
7 107 73
8 108 100
9 109 82
In [27]:
# we can set the index
pd.DataFrame(dict_new).set_index('student')
Out[27]:
test
student
100 66
101 68
102 74
103 73
104 92
105 100
106 65
107 73
108 100
109 82
In [33]:
# creating a series

lst = ['round', 'square']
ind = ['c', 'b']
series_name = 'shape'

s = pd.Series(lst, index=ind, name=series_name)
s
Out[33]:
c     round
b    square
Name: shape, dtype: object
In [36]:
df
Out[36]:
id color
a 100 red
b 101 blue
c 102 red
In [38]:
# concatenate columns
# auto-alignment
df2 = pd.concat([df, s], axis=1)
df2
Out[38]:
id color shape
a 100 red NaN
b 101 blue square
c 102 red round
In [45]:
# say you want to add a value 'rectangle'
df2.loc['a', 'shape'] = 'rectangle'
In [44]:
df2
Out[44]:
id color shape
a 100 red rectangle
b 101 blue square
c 102 red round
Tags: pandas