dropna
fillna
df["new_column"] = df.dropna(subset=["old_column"]) -- this will create a new column with no Null.
drop("column_name", axis = "columns")
# using "column_name" as an index
pd.read_csv("csvfile.csv", indx_col = "column_name")
#Good reducing memory usage for column can be catergorize
.astype("category")
read_csv( ..., parse_dates=["date column"])
DML
select
select distinct
select count(distinct)
sort
select count()
select rank/topN Analysis
Where clause
isnull/isin between duplicates
where
wildcard like
SQL Update
pd["Column"].str.replace("new_values","old_values")
this works like group by or bobj Break feature
.set_index(keys = ["xxx","yyy"])
.sort_index()
.drop_duplicates()
No comments:
Post a Comment