Friday, June 5, 2020

pandas: .str method

pandas_str
In [92]:
import pandas as pd
df = pd.read_csv("W1.csv")
df
Out[92]:
customerID OrderID Product Company_Name
0 1234.0 9991.0 laptop Dell
1 2345.0 9992.0 desktop HP
2 3456.0 9993.0 monitor LG
3 NaN 8888.0 trackpad Apple
4 4567.0 9994.0 keyboard Anker
5 123.0 NaN NaN Apple
6 1234.0 9990.0 tablet Apple
In [93]:
df["Product"].str.upper()
Out[93]:
0       LAPTOP
1      DESKTOP
2      MONITOR
3     TRACKPAD
4     KEYBOARD
5          NaN
6       TABLET
Name: Product, dtype: object
In [94]:
df["Company_Name"].str.upper()
Out[94]:
0      DELL
1        HP
2        LG
3     APPLE
4     ANKER
5     APPLE
6     APPLE
Name: Company_Name, dtype: object
In [95]:
df["Product"].str.replace("tablet", "cellphone")
Out[95]:
0        laptop
1       desktop
2       monitor
3      trackpad
4      keyboard
5           NaN
6     cellphone
Name: Product, dtype: object
In [96]:
df["Product"] = df["Product"].str.title()
In [97]:
df
Out[97]:
customerID OrderID Product Company_Name
0 1234.0 9991.0 Laptop Dell
1 2345.0 9992.0 Desktop HP
2 3456.0 9993.0 Monitor LG
3 NaN 8888.0 Trackpad Apple
4 4567.0 9994.0 Keyboard Anker
5 123.0 NaN NaN Apple
6 1234.0 9990.0 Tablet Apple
In [98]:
search = df["Company_Name"].dropna(how = "All").str.lower().str.contains("apple")
df[search]
Out[98]:
customerID OrderID Product Company_Name
3 NaN 8888.0 Trackpad Apple
5 123.0 NaN NaN Apple
6 1234.0 9990.0 Tablet Apple
In [143]:
import pandas as pd
df = pd.read_csv("W1.csv")

#search = df["Product"].str.lower().str.startswith("ta", na=False)
#search

search = df["Company_Name"].str.lower().str.strip().str.startswith("a")
df[search]
Out[143]:
customerID OrderID Product Company_Name
3 NaN 8888.0 trackpad Apple
4 4567.0 9994.0 keyboard Anker
5 123.0 NaN NaN Apple
6 1234.0 9990.0 tablet Apple
In [120]:
search = df["Company_Name"].str.lower().str.endswith("apple")
df[search]
Out[120]:
customerID OrderID Product Company_Name
3 NaN 8888.0 trackpad Apple
5 123.0 NaN NaN Apple
6 1234.0 9990.0 tablet Apple
In [148]:
import pandas as pd
df = pd.read_csv("W1.csv")
df["Product"] = df["Product"].dropna(how="all")
df["Product"]
Out[148]:
0       laptop
1      desktop
2      monitor
3     trackpad
4     keyboard
5          NaN
6       tablet
Name: Product, dtype: object
In [149]:
df["Product"].replace("trackpad", "notracpad")
Out[149]:
0       laptop
1      desktop
2      monitor
3    notracpad
4     keyboard
5          NaN
6       tablet
Name: Product, dtype: object
In [159]:
import pandas as pd
df = pd.read_csv("W1.csv", index_col="Company_Name").dropna(how="all")
df["Product"] = df["Product"].astype("category")
df
df.index.str.strip().str.title()
df.index = df.index.str.strip().str.title()
df
Out[159]:
customerID OrderID Product
Company_Name
Dell 1234.0 9991.0 laptop
Hp 2345.0 9992.0 desktop
Lg 3456.0 9993.0 monitor
Apple NaN 8888.0 trackpad
Anker 4567.0 9994.0 keyboard
Apple 123.0 NaN NaN
Apple 1234.0 9990.0 tablet
In [161]:
df.columns.str.upper()
Out[161]:
Index(['CUSTOMERID', 'ORDERID', 'PRODUCT'], dtype='object')
In [163]:
import pandas as pd
df = pd.read_csv("W1.csv")
df
Out[163]:
customerID OrderID Product Company_Name
0 1234.0 9991.0 laptop Dell
1 2345.0 9992.0 desktop HP
2 3456.0 9993.0 monitor LG
3 NaN 8888.0 trackpad Apple
4 4567.0 9994.0 keyboard Anker
5 123.0 NaN NaN Apple
6 1234.0 9990.0 tablet Apple
In [165]:
df["new_column"] = df["Product"].str.title()
df
Out[165]:
customerID OrderID Product Company_Name new_column
0 1234.0 9991.0 laptop Dell Laptop
1 2345.0 9992.0 desktop HP Desktop
2 3456.0 9993.0 monitor LG Monitor
3 NaN 8888.0 trackpad Apple Trackpad
4 4567.0 9994.0 keyboard Anker Keyboard
5 123.0 NaN NaN Apple NaN
6 1234.0 9990.0 tablet Apple Tablet
In [169]:
df["customerID"].nlargest(5)
Out[169]:
4    4567.0
2    3456.0
1    2345.0
0    1234.0
6    1234.0
Name: customerID, dtype: float64
In [167]:
df["customerID"].nsmallest()
Out[167]:
5     123.0
0    1234.0
6    1234.0
1    2345.0
2    3456.0
Name: customerID, dtype: float64
In [ ]:
 

No comments:

Post a Comment

Pandas: SQL Like pandas operations

Pandas's SQL Like operations such as WHERE clause. = != >= str.contains() & | .isin() .isnull() .notnull() ....