import pandas as pd
import pygwalker as pyg
df = pd.read_csv("./netflix_titles.csv")
df["date_added"] = pd.to_datetime(df["date_added"])
df["date_added_year"] = df["date_added"].dt.year.fillna(0).astype(int)
df["date_added_month"] = df["date_added"].dt.month.fillna(0).astype(int)
df["duration"] = df["duration"].str.split(" ").str[0].astype(int)
df = df[df["date_added_year"] <= 2019]
df
show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | date_added_year | date_added_month | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | s2 | Movie | 7:19 | Jorge Michel Grau | Demián Bichir, Héctor Bonilla, Oscar Serrano, ... | Mexico | 2016-12-23 | 2016 | TV-MA | 93 | Dramas, International Movies | After a devastating earthquake hits Mexico Cit... | 2016 | 12 |
2 | s3 | Movie | 23:59 | Gilbert Chan | Tedd Chan, Stella Chung, Henley Hii, Lawrence ... | Singapore | 2018-12-20 | 2011 | R | 78 | Horror Movies, International Movies | When an army recruit is found dead, his fellow... | 2018 | 12 |
3 | s4 | Movie | 9 | Shane Acker | Elijah Wood, John C. Reilly, Jennifer Connelly... | United States | 2017-11-16 | 2009 | PG-13 | 80 | Action & Adventure, Independent Movies, Sci-Fi... | In a postapocalyptic world, rag-doll robots hi... | 2017 | 11 |
5 | s6 | TV Show | 46 | Serdar Akar | Erdal Beşikçioğlu, Yasemin Allen, Melis Birkan... | Turkey | 2017-07-01 | 2016 | TV-MA | 1 | International TV Shows, TV Dramas, TV Mysteries | A genetics professor experiments with a treatm... | 2017 | 7 |
7 | s8 | Movie | 187 | Kevin Reynolds | Samuel L. Jackson, John Heard, Kelly Rowan, Cl... | United States | 2019-11-01 | 1997 | R | 119 | Dramas | After one of his high school students attacks ... | 2019 | 11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7777 | s7778 | TV Show | Zombie Dumb | NaN | NaN | NaN | 2019-07-01 | 2018 | TV-Y7 | 2 | Kids' TV, Korean TV Shows, TV Comedies | While living alone in a spooky town, a young g... | 2019 | 7 |
7778 | s7779 | Movie | Zombieland | Ruben Fleischer | Jesse Eisenberg, Woody Harrelson, Emma Stone, ... | United States | 2019-11-01 | 2009 | R | 88 | Comedies, Horror Movies | Looking to survive in a world taken over by zo... | 2019 | 11 |
7779 | s7780 | TV Show | Zona Rosa | NaN | Manu NNa, Ana Julia Yeyé, Ray Contreras, Pablo... | Mexico | 2019-11-26 | 2019 | TV-MA | 1 | International TV Shows, Spanish-Language TV Sh... | An assortment of talent takes the stage for a ... | 2019 | 11 |
7780 | s7781 | Movie | Zoo | Shlok Sharma | Shashank Arora, Shweta Tripathi, Rahul Kumar, ... | India | 2018-07-01 | 2018 | TV-MA | 94 | Dramas, Independent Movies, International Movies | A drug dealer starts having doubts about his t... | 2018 | 7 |
7783 | s7784 | Movie | Zubaan | Mozez Singh | Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan... | India | 2019-03-02 | 2015 | TV-14 | 111 | Dramas, International Movies, Music & Musicals | A scrappy but poor boy worms his way into a ty... | 2019 | 3 |
5661 rows × 14 columns
walker0 = pyg.walk(df, spec="0.json", use_preview=True, store_chart_data=True)
walker0.display_chart("Chart 1", title="Content Type On Netflix")
walker0.display_chart("Chart 2", title="Content Added Over Year", desc="The number of movies on Netflix is growing much faster than TV shows, movie content has grown substantially after 2016.")
walker0.display_chart("Chart 3", title="Content Release Over Year")
walker0.display_chart("Chart 4", title="Content Added Over Month", desc="")
The number of movies on Netflix is growing much faster than TV shows, movie content has grown substantially after 2016.
walker0.display_chart("Chart 5", title="Content Added Over Year Diff By Rating", desc="TV-MA, TV-14 are the ratings for most of Netflix's content, and R content is also increasing year by year")
TV-MA, TV-14 are the ratings for most of Netflix's content, and R content is also increasing year by year
walker0.display_chart("Chart 6", title="movie time distribution", desc="Mainly concentrated between 90 and 110 minutes")
Mainly concentrated between 90 and 110 minutes
walker0.display_chart("Chart 7", title="tv-show season distribution")
country_df = df["country"].str.split(",", expand=True).stack().reset_index(level=1, drop=True).to_frame('country')
country_df["country"] = country_df["country"].str.strip()
walker1 = pyg.walk(country_df, spec="1.json", use_preview=True, store_chart_data=True)
walker1.display_chart("Chart 1", title="Countries Of Most Content")
category_df = df.loc[:, ("listed_in", "rating", "type")]
category_df["category"] = category_df["listed_in"].str.split(",")
category_df = category_df[["category", "rating", "type"]]
category_df = category_df.explode("category").reset_index(drop=True)
walker2 = pyg.walk(category_df, spec="2.json", use_preview=True, store_chart_data=True)
walker2.display_chart("TV category", title="tv-show category distribution")
walker2.display_chart("Movie category", title="movie category distribution")
walker2.display_chart("rating category(tv)", title="rating category heatamp(TV-Show)")
walker2.display_chart("rating category(movie)", title="rating category heatamp(movie)")