In [3]:
import pandas as pd
import numpy as np
Table0 = pd.read_csv("W0.csv")
Table1 = pd.read_csv("W1.csv")
Table1_color = Table1.style.format(None).highlight_null("orange")
Table2 = pd.read_csv("W2.csv")
Table2_color = Table2.style.format(None).highlight_null("lime")
In [186]:
Table0
Out[186]:
In [187]:
Table1_color
Out[187]:
In [188]:
Table2_color
Out[188]:
In [161]:
Table0.info()
In [162]:
Table1.info()
In [163]:
Table2.info()
Note:¶
Merging 2 tables with different data type can be tricky and buggy. In this situation, we have Table1["customerID"] as int64, Table2["customerID"] as float64. Both of them can be merged but only after I inserted " ' (Nan) into Table2. Otherwise, the data type is an object type. A better procedure will be to just convert the NaN into some bogus numbers such as 0 or 99999999 prior to joining with fillna(0)
When a table contain null (aka NaN), it turns the column from int64 to float64. So, another workaround is to add another NaN row into it.
In [164]:
Table3 = Table2["customerID"].astype(np.int64)
In [167]:
Table0.merge(Table1)
Out[167]:
In [168]:
Table1.merge(Table2)
Out[168]:
In [169]:
Table2.merge(Table1, how = 'inner')
Out[169]:
In [170]:
Table1.merge(Table2, how='inner', on = ["customerID"], suffixes=('_Table1', '_Table2'))
Out[170]:
In [171]:
Table1.merge(Table2, how='left', on = ["customerID"])
Out[171]:
In [172]:
Table1.merge(Table2, on = ["customerID"])
Out[172]:
In [173]:
Table1.merge(Table2, how='right', on = ["customerID"])
Out[173]:
In [174]:
Table1.merge(Table2, on = ["customerID"])
Out[174]:
merge does not work exactly the same as join¶
In [175]:
Table1.join(Table2, how='left', on = ["customerID"])
In [176]:
Table0.merge(Table1).merge(Table2)
Out[176]:
Multi tables joins¶
In [177]:
Table0.merge(Table1, on = "customerID").merge(Table2, on = 'customerID')
Out[177]:
multi-columns join¶
In [240]:
Table1.merge(Table2, how = 'inner', on = ["customerID", "OrderID"])
Out[240]:
In [243]:
Table1
Out[243]:
In [244]:
Table2
Out[244]:
In [245]:
Table1.merge(Table2, how = "outer", on = "customerID")
Out[245]:
In [246]:
Table1.dtypes
Out[246]:
In [247]:
Table1.info()
In [254]:
import pandas as pd
contact = pd.read_csv("contact.csv")
contact
Out[254]:
Note : Reason for this error because there is no common column name in both tables to be joined.¶
MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False
In [257]:
Table1.merge(contact)
In [262]:
Table1.merge(contact, left_on ="customerID", right_on="id", sort = True).drop("id", axis="columns")
Out[262]:
Similar to calling merge under dataframe level. This is a level above and at the pandas level.¶
In [4]:
pd.merge(Table1, Table2)
Out[4]:
In [ ]: