Ну типо сделал

This commit is contained in:
gg12 darfren 2024-10-22 18:54:39 +04:00
parent 88f9aec41f
commit 3720eb2ec7
7 changed files with 20378 additions and 639 deletions

19238
data/car_price_prediction.csv Normal file

File diff suppressed because it is too large Load Diff

101
data/unicorns.csv Normal file
View File

@ -0,0 +1,101 @@
Company;Valuation;Country;State;City;Industries;FoundedYear;Name of Founders;TotalFunding;Number of Employees
Bytedance;140,00 US$;China;Beijing;Beijing;Content, Data Mining, Internet;2012;Yiming Zhang;$7,440.00M;10.000
SpaceX;100,30 US$;United States;California;Hawthorne;Aerospace, Manufacturing, Space Travel, Transportation;2002;Elon Musk;$383.02M;5,000-10,000
Stripe;95,00 US$;United States;California;San Francisco;Finance, FinTech, Mobile Payments, SaaS;2010;John Collison, Patrick Collison;$300.00M;1,000-5,000
Klarna;45,60 US$;Sweden;;Stockholm;E-Commerce, FinTech, Payments, Shopping;2005;Niklas Adalberth, Sebastian Siemiatkowski, Victor Jacobsson;$3,471.72M;5,000-10,000
Epic Games;42,00 US$;United States;North Carolina;Cary;Developer Platform, Gaming, Software, Video Games;1991;Mark Rein, Tim Sweeney;$544.93M;1,000-5,000
Canva;40,00 US$;Australia;New South Wales;Surry Hills;Graphic Design, Photo Editing, Publishing, Software, Web Design;2012;Cameron Adams, Cliff Obrecht, Melanie Perkins;$571.26M;500-1,000
Checkout.com;40,00 US$;United Kingdom;England;London;E-Commerce, FinTech, Payments, Transaction Processing;2012;Guillaume Pousaz;$1,830.00M;1,000-5,000
Instacart;39,00 US$;United States;California;San Francisco;Delivery Service, E-Commerce, Grocery, Shopping;2012;Apoorva Mehta, Brandon Leonardo, Max Mullen;$2,686.01M;5,000-10,000
Databricks;38,00 US$;United States;California;San Francisco;Analytics, Artificial Intelligence, Information Technology, Machine Learning, Software;2013;Ali Ghodsi, Andy Konwinski, Ion Stoica, Matei Zaharia, Patrick Wendell, Reynold Xin, Scott Shenker;$557.15M;1,000-5,000
Revolut;33,00 US$;United Kingdom;England;London;Banking, Financial Services, FinTech, Mobile Payments;2015;Nikolay Storonsky, Vlad Yatsenko;$1,715.98M;1,000-5,000
FTX;32,00 US$;Bahamas;;;Cryptocurrency, Finance, Financial Exchanges, Financial Services, Trading Platform;2018;Gary Wang, Sam Bankman-Fried;$1,828.69M;100-250
Chime;25,00 US$;United States;California;San Francisco;Banking, Debit Cards, Financial Services, FinTech;2013;Chris Britt, Ryan King;$3,396.75M;1,000-5,000
BYJU's;21,00 US$;India;;Bengaluru;E-Learning, EdTech, Education, Higher Education, Software;2008;Byju Raveendran, Divya Gokulnath;$5,182.78M;1,000-5,000
J&T Express;20,00 US$;Indonesia;;Jakarta;Courier Service, E-Commerce, Freight Service;2015;Jet Lee, Tony Chen;$4,653.00M;10.000
Xiaohongshu;20,00 US$;China;Shanghai;Shanghai;E-Commerce, Mobile Apps, Shopping, Social;2013;Charlwin Mao Wenchao, Miranda Qu;$917.50M;1,000-5,000
Fanatics;18,00 US$;United States;Florida;Jacksonville;Manufacturing, Retail, Sporting Goods, Sports;1995;Alan Trager, Michael G. Rubin, Mitch Trager;$1,170.29M;1,000-5,000
Miro;17,50 US$;United States;California;San Francisco;B2B, Enterprise Applications, Enterprise Software, Product Management, UX Design;2011;Andrey Khusid, Oleg Shardin, Oleg Shardin;$355.00M;1,000-5,000
Yuanfudao;15,50 US$;China;Beijing;Beijing;E-Learning, EdTech, Tutoring;2012;Ke Shuai, Xin Li, Yong Li;$4,044.20M;10.000
Ripple;15,00 US$;United States;California;San Francisco;Blockchain, Cryptocurrency, FinTech, Internet, Payments;2012;Arthur Britto, Chris Larsen, Jed McCaleb, Ryan Fugger;$293.90M;500-1,000
DJI Innovations;15,00 US$;China;Guangdong;Shenzhen;Aerospace, Consumer Electronics, Drones, Manufacturing, Photography, Wireless;2006;Frank Wang;$1,135.00M;10.000
goPuff;15,00 US$;United States;Pennsylvania;Philadelphia;Delivery Service, E-Commerce, Food Delivery, Grocery, Mobile Apps;2013;Rafael Ilishayev, Yakir Gola;$290.88M;5,000-10,000
SHEIN;15,00 US$;China;Guangdong;Shenzhen;Consumer, E-Commerce, Fashion, Marketplace, Textiles;2008;Xiaoqing Ren, Yang Pei, Yangtian Xu;$553.36M;1,000-5,000
Yuanqi Senlin;15,00 US$;China;Beijing;Beijing;Food and Beverage;2016;Binsen Tang;$721.31M;5,000-10,000
Plaid;13,40 US$;United States;California;San Francisco;Finance, Financial Services, FinTech;2012;William Hockey, Zachary Perret;$734.80M;500-1,000
OpenSea;13,30 US$;United States;New York;New York;Blockchain, Cryptocurrency, Marketplace;2017;Alex Atallah, Devin Finzer;$425.12M;100-250
Grammarly;13,00 US$;United States;California;San Francisco;Assistive Technology, Information Technology, Productivity Tools;2009;Alex Shevchenko, Dmytro Lider, Max Lytvyn;$400.00M;500-1,000
Devoted Health;12,60 US$;United States;Minnesota;Saint Paul;Elder Care, Elderly, Health Care, Hospital;2017;Ed Park, Jeremy Delinsky, Todd Park;$1,968.95M;1,000-5,000
Faire;12,40 US$;United States;California;San Francisco;E-Commerce, Marketplace, Retail, Retail Technology, Wholesale;2017;Daniele Perito, Jeffrey Kolovson, Lauren Cooks Levitan, Marcelo Cortes, Max Rhodes;$1,096.12M;500-1,000
Brex;12,30 US$;United States;Utah;Draper;Banking, Credit Cards, Financial Services, FinTech;2017;Henrique Dubugras, Pedro Franceschi;$1,490.12M;500-1,000
Biosplice Therapeutics;12,00 US$;United States;California;San Diego;Biotechnology, Health Care, Life Science;2008;Osman Kibar;$285.71M;50-100
Bitmain Technologies;12,00 US$;China;Beijing;Beijing;Application Specific Integrated Circuit (ASIC), Bitcoin, Electronics, Manufacturing, Semiconductor;2013;Jihan Wu, Micree Zhan;$450.00M;100-250
GoodLeap;12,00 US$;United States;California;Roseville;Lending, Renewable Energy;2003;Hayes Barnard;$1,800.00M;1,000-5,000
JUUL Labs;12,00 US$;United States;California;San Francisco;B2C, Consumer Electronics, Consumer Goods, Leisure, Lifestyle;2015;Adam Bowen, James Monsees, Kevin Burns, Tim Danaher;$15,371.68M;1,000-5,000
Airtable;11,70 US$;United States;California;San Francisco;Collaboration, Database, Developer Tools, SaaS;2013;Andrew Ofstad, Emmett Nicholas, Howie Liu;$2,236.60M;250-500
ZongMu Technology;11,40 US$;China;Shanghai;Shanghai;Automotive, Autonomous Vehicles, Robotics;2013;Rui Tang;$210.83M;10-50
Global Switch;11,10 US$;United Kingdom;England;London;Data Center, Real Estate, Wholesale;1998;Andy Ruhan;$6,254.75M;250-500
Bolt;11,00 US$;United States;California;San Francisco;E-Commerce, Fraud Detection, Mobile Payments, Payments;2014;Eric Feldman, Ryan Breslow;$963.00M;500-1,000
Celonis;11,00 US$;Germany;;Munich;Analytics, Business Intelligence, SaaS, Software;2011;Alexander Rinke, Bastian Nominacher, Martin Klenk;$1,367.50M;1,000-5,000
Weilong;10,88 US$;China;Henan;Luohe;Food and Beverage, Manufacturing, Snack Food;1999;Liu Fuping, Liu Weiping;$559.74M;No Data
Swiggy;10,70 US$;India;;Bengaluru;E-Commerce Platforms, Food Delivery, Mobile Apps;2014;Nandan Reddy, Phani Kishan Addepalli, Rahul Jaimini, Sriharsha Majety;$3,571.00M;10.000
Figma;10,00 US$;United States;California;San Francisco;Developer Tools, Graphic Design, Software, UX Design, Web Design;2012;Dylan Field, Evan Wallace;$333.50M;250-500
Talkdesk;10,00 US$;United States;California;San Francisco;Cloud Computing, CRM, Customer Service, SaaS;2011;Cristina Fonseca, Tiago Paiva;$504.77M;1,000-5,000
Digital Currency Group;10,00 US$;United States;New York;New York;Bitcoin, Blockchain, Financial Services, Venture Capital;2015;Barry Silbert;$600.00M;50-100
Gusto;10,00 US$;United States;California;San Francisco;Employee Benefits, Enterprise Software, Financial Services, FinTech, Human Resources, SaaS;2011;Edward Kim, Joshua Reeves, Tomer London;$930.83M;1,000-5,000
Lalamove;10,00 US$;Hong Kong;;Cheung Sha Wan;Apps, Delivery, Logistics, Supply Chain Management, Transportation;2013;Chow Shing Yuk, Gary Hui, Santit Jirawongkraisorn;$2,475.00M;500-1,000
Notion Labs;10,00 US$;United States;California;San Francisco;Apps, Collaboration, Product Management, Real Time, Software;2016;Ivan Zhao, Simon Last;$342.00M;250-500
reddit;10,00 US$;United States;California;San Francisco;Content, News, Social Bookmarking, Social Media, Social Network;2005;Aaron Swartz, Alexis Ohanian, Steve Huffman;$1,487.23M;500-1,000
Thrasio;10,00 US$;United States;Massachusetts;Walpole;Brand Marketing, Consumer Goods, E-Commerce;2018;Carlos Cashman, Joshua Silberstein;$3,396.46M;1,000-5,000
OYO Rooms;9,60 US$;India;;Gurugram;Hospitality, Travel, Travel Accommodations;2012;Ritesh Agarwal;$3,113.68M;5,000-10,000
OutSystems;9,50 US$;United States;Massachusetts;Boston;Data Integration, Developer Platform, Developer Tools, PaaS, SaaS, Software;2001;Paulo Rosado, Rui Pereira;$208.00M;1,000-5,000
ServiceTitan;9,50 US$;United States;California;Glendale;CRM, Home Services, Information Technology, SaaS;2012;Ara Mahdessian, Vahe Kuzoyan;$1,098.84M;1,000-5,000
HEYTEA;9,28 US$;China;Guangdong;Shenzhen;Food and Beverage, Tea;2012;Yunqi Nie;$579.23M;1,000-5,000
N26;9,23 US$;Germany;;Berlin;Banking, Finance, Financial Services, FinTech;2013;Maximilian Tayenthal, Valentin Stalf;$1,722.36M;1,000-5,000
Klaviyo;9,20 US$;United States;Massachusetts;Boston;Advertising, Analytics, E-Commerce, Marketing, Marketing Automation, Software;2012;Andrew Bialecki, Ed Hallen;$678.50M;500-1,000
Northvolt;9,08 US$;Sweden;;Stockholm;Battery, Clean Energy, CleanTech, Electronics, Manufacturing;2016;Paolo Cerruti, Peter Carlsson;$6,162.15M;1,000-5,000
Chehaoduo;9,00 US$;China;Beijing;Beijing;Automotive, E-Commerce, Online Auctions;2015;Mark Yang;$696.76M;10.000
Niantic;9,00 US$;United States;California;San Francisco;Augmented Reality, Software, Video Games, Virtual Reality;2015;John Hanke, Phil Keslin;$770.00M;500-1,000
Tanium;9,00 US$;United States;Washington;Kirkland;Cyber Security, Enterprise Software, Information Technology, SaaS, Security;2007;David Hindawi, Orion Hindawi;$4,376.50M;1,000-5,000
Rapyd;8,75 US$;United Kingdom;England;London;Financial Services, FinTech, Mobile Payments, Payments;2016;Arik Shtilman, Arkady Karpman, Omer Priel;$775.00M;250-500
Kavak;8,70 US$;Mexico;;Lerma de Villada;Automotive, E-Commerce, E-Commerce Platforms, Online Portals;2016;Carlos Julio Garcia, Roger Laughlin;$1,188.00M;1,000-5,000
Nuro;8,60 US$;United States;California;Mountain View;Autonomous Vehicles, Fleet Management, Information Technology, Robotics, Transportation;2016;Dave Ferguson, Jiajun Zhu;$2,132.00M;1,000-5,000
Snyk;8,60 US$;United States;Massachusetts;Boston;Cyber Security, Internet, Security, Software;2015;Assaf Hefetz, Danny Grander, Guy Podjarny, Jacob Tarango;$1,026.09M;500-1,000
Bolt;8,40 US$;Estonia;;Tallinn;Car Sharing, Electric Vehicle, Food Delivery, Grocery, Last Mile Transportation, Mobile Apps, Public Transportation, Ride Sharing, Transportation;2013;Markus Villig, Martin Villig, Oliver Leisalu;$1,970.53M;1,000-5,000
Tipalti;8,30 US$;United States;California;San Mateo;Accounting, Financial Services, FinTech, Payments, Software;2010;Chen Amit, Oren Zeev;$502.50M;500-1,000
Lacework;8,30 US$;United States;California;San Jose;Cloud Security, Compliance, Cyber Security, Developer Tools;2015;Mike Speiser, Sanjay Kalra, Vikram Kapoor;$1,906.70M;500-1,000
Tempus;8,10 US$;United States;Illinois;Chicago;Artificial Intelligence, Biotechnology, Health Care, Machine Learning, Medical;2015;Eric Lefkofsky;$1,070.00M;1,000-5,000
Fireblocks;8,00 US$;United States;New York;New York;Blockchain, Cryptocurrency, Cyber Security;2018;Idan Ofrat, Michael Shaulov, Pavel Berengoltz;$1,039.00M;100-250
Dream11;8,00 US$;India;;Mumbai;Fantasy Sports, Information Technology, Sports;2007;Bhavit Sheth, Harsh Jain;$1,165.08M;500-1,000
Xingsheng Selected;8,00 US$;China;Hunan;Changsha;E-Commerce, Food and Beverage, Grocery, Retail, Shopping;2009;Lihua Yue;$5,040.00M;10.000
Caris Life Sciences;7,83 US$;United States;Texas;Irving;Biotechnology, Health Care, Health Diagnostics, Medical;1996;David D. Halbert;$1,314.49M;1,000-5,000
Hopin;7,75 US$;United Kingdom;England;London;Events, Meeting Software, Video Conferencing;2019;Johnny Boufarhat;$1,021.73M;500-1,000
Dapper Labs;7,60 US$;Canada;British Columbia;Vancouver;Blockchain, Gaming, Software;2018;Dieter Shirley, Mack Flavelle, Roham Gharegozlou;$665.07M;100-250
Getir;7,50 US$;Turkey;;Istanbul;Delivery Service, E-Commerce, Logistics, Mobile Apps;2015;Arkady Volozh, Mert Salur, Nazım Salur, Serkan Borançılı, Tuncay Tütek;$1,172.00M;1,000-5,000
Razorpay;7,50 US$;India;;Bengaluru;Finance, Financial Services, FinTech, Payments;2013;Harshil Mathur, Shashank Kumar;$741.62M;1,000-5,000
Netskope;7,50 US$;United States;California;Santa Clara;Cloud Security, Cyber Security, Enterprise Software, Software;2012;Krishna Narayanaswamy, Lebin Cheng, Ravi Ithal, Sanjay Beri;$1,040.10M;1,000-5,000
Ola Cabs;7,50 US$;India;;Bengaluru;Apps, Mobile, Ride Sharing, Transportation;2011;Ankit Bhati, Bhavish Aggarwal;$5,008.30M;5,000-10,000
Carta;7,40 US$;United States;California;San Francisco;Finance, FinTech, Software, Stock Exchanges;2012;Henry Ward, Manu Kumar;$1,157.80M;1,000-5,000
Toss;7,40 US$;South Korea;;Seoul;Financial Services, FinTech, Mobile Apps, Mobile Payments, Personal Finance;2013;Seunggun Lee;$844.20M;500-1,000
Scale AI;7,30 US$;United States;California;San Francisco;Artificial Intelligence, Image Recognition, Machine Learning, SaaS;2016;Alexandr Wang, Lucy Guo;$602.82M;250-500
TripActions;7,25 US$;United States;California;Palo Alto;Business Travel, Customer Service, Payments, Software;2015;Ariel Cohen, Ilan Twig;$1,040.48M;1,000-5,000
Argo AI;7,25 US$;United States;Pennsylvania;Pittsburgh;Artificial Intelligence, Autonomous Vehicles, Robotics, Transportation;2016;Bryan Salesky, Peter Rander;$500.00M;1,000-5,000
Gong;7,25 US$;United States;California;Palo Alto;Artificial Intelligence, CRM, Enterprise Software, Information Technology, Machine Learning, Sales, Software;2015;Amit Bendov, Eilon Reshef;$583.00M;500-1,000
Gemini;7,10 US$;United States;New York;New York;Cryptocurrency, Finance, Financial Services, FinTech;2015;Cameron Winklevoss, Tyler Winklevoss;$400.00M;500-1,000
Discord;7,00 US$;United States;California;San Francisco;Communities, Messaging, Social Network, Software, Video Chat;2012;Jason Citron, Stanislav Vishnevskiy;$979.30M;500-1,000
We Doctor;7,00 US$;China;Zhejiang;Hangzhou;Health Care, Hospitality, Internet, Medical;2010;Liao Jieyuan;$1,786.00M;1,000-5,000
1Password;6,80 US$;Canada;Ontario;Toronto;Cyber Security, Network Security, Privacy, Software;2005;Dave Teare, Natalia Karimov, Roustem Karimov, Sara Teare;$920.14M;250-500
Automation Anywhere;6,80 US$;United States;California;San Jose;Artificial Intelligence, Enterprise Software, Machine Learning, SaaS, Software;2003;Ankur Kothari, Mihir Shukla, Neeti Mehta, Rushabh Parmani;$840.00M;1,000-5,000
Ziroom;6,60 US$;China;Beijing;Beijing;Real Estate, Rental, Rental Property;2011;Lin Xiong;$2,121.00M;10.000
National Stock Exchange of India;6,50 US$;India;;Mumbai;Financial Services, FinTech, Stock Exchanges;1992;Mukesh Agarwal;$149.50M;250-500
Mollie;6,50 US$;Netherlands;;Amsterdam;E-Commerce, Financial Services, FinTech, Mobile Payments;2004;Adriaan Mol;$934.32M;250-500
Rippling;6,50 US$;United States;California;San Francisco;Employment, Human Resources, Information Technology, IT Management, Productivity Tools;2017;Parker Conrad, Prasanna Sankar;$447.12M;500-1,000
DataRobot;6,30 US$;United States;Massachusetts;Boston;Artificial Intelligence, Enterprise Software, Machine Learning, SaaS;2012;Jeremy Achin, Thomas DeGodoy;$1,089.37M;1,000-5,000
Personio;6,30 US$;Germany;;Munich;Employment, Human Resources, Recruiting, SaaS;2015;Arseniy Vershinin, Hanno Renner, Ignaz Forstmeier, Ignaz Forstmeier, Roman Schumacher;$524.83M;500-1,000
Upgrade;6,28 US$;United States;California;San Francisco;Banking, Credit, Financial Services, FinTech, Mobile;2016;Adelina Grozdanova, Jeff Bogan, Matt Wierman, Renaud Laplanche, Soul Htite, Visar Nimani;$562.50M;250-500
Hinge Health;6,20 US$;United States;California;San Francisco;Health Care, Medical, Therapeutics, Wearables;2015;Daniel Perez, Gabriel Mecklenburg;$853.85M;1,000-5,000
Benchling;6,10 US$;United States;California;San Francisco;Biotechnology, Life Science, Software;2012;Ashutosh Singhal, Cory Li, Sajith Wickramasekara;$412.00M;500-1,000
Black Unicorn Factory;6,10 US$;United States;California;Los Angeles;;2020;Johnny Stewart;$645M;No Data
Better.com;6,00 US$;United States;New York;New York;Consumer Lending, Financial Services, FinTech, Lending, Real Estate;2016;Eric Wilson, Erik Bernhardsson, Shawn Low, Viral Shah, Vishal Garg;$1,655.00M;5,000-10,000
Wiz;6,00 US$;Israel;;Tel Aviv;Cloud Security, Cyber Security, Enterprise Software, Security;2020;Ami Luttwak, Assaf Rappaport, Roy Reznik, Yinon Costica;$600.00M;100-250
iCapital Network;6,00 US$;United States;New York;New York;Asset Management, Banking, Financial Services, FinTech;2013;Dan Vene, John Robertshaw, Nick Veronis, Phil Pool;$181.50M;250-500
1 Company Valuation Country State City Industries FoundedYear Name of Founders TotalFunding Number of Employees
2 Bytedance 140,00 US$ China Beijing Beijing Content, Data Mining, Internet 2012 Yiming Zhang $7,440.00M 10.000
3 SpaceX 100,30 US$ United States California Hawthorne Aerospace, Manufacturing, Space Travel, Transportation 2002 Elon Musk $383.02M 5,000-10,000
4 Stripe 95,00 US$ United States California San Francisco Finance, FinTech, Mobile Payments, SaaS 2010 John Collison, Patrick Collison $300.00M 1,000-5,000
5 Klarna 45,60 US$ Sweden Stockholm E-Commerce, FinTech, Payments, Shopping 2005 Niklas Adalberth, Sebastian Siemiatkowski, Victor Jacobsson $3,471.72M 5,000-10,000
6 Epic Games 42,00 US$ United States North Carolina Cary Developer Platform, Gaming, Software, Video Games 1991 Mark Rein, Tim Sweeney $544.93M 1,000-5,000
7 Canva 40,00 US$ Australia New South Wales Surry Hills Graphic Design, Photo Editing, Publishing, Software, Web Design 2012 Cameron Adams, Cliff Obrecht, Melanie Perkins $571.26M 500-1,000
8 Checkout.com 40,00 US$ United Kingdom England London E-Commerce, FinTech, Payments, Transaction Processing 2012 Guillaume Pousaz $1,830.00M 1,000-5,000
9 Instacart 39,00 US$ United States California San Francisco Delivery Service, E-Commerce, Grocery, Shopping 2012 Apoorva Mehta, Brandon Leonardo, Max Mullen $2,686.01M 5,000-10,000
10 Databricks 38,00 US$ United States California San Francisco Analytics, Artificial Intelligence, Information Technology, Machine Learning, Software 2013 Ali Ghodsi, Andy Konwinski, Ion Stoica, Matei Zaharia, Patrick Wendell, Reynold Xin, Scott Shenker $557.15M 1,000-5,000
11 Revolut 33,00 US$ United Kingdom England London Banking, Financial Services, FinTech, Mobile Payments 2015 Nikolay Storonsky, Vlad Yatsenko $1,715.98M 1,000-5,000
12 FTX 32,00 US$ Bahamas Cryptocurrency, Finance, Financial Exchanges, Financial Services, Trading Platform 2018 Gary Wang, Sam Bankman-Fried $1,828.69M 100-250
13 Chime 25,00 US$ United States California San Francisco Banking, Debit Cards, Financial Services, FinTech 2013 Chris Britt, Ryan King $3,396.75M 1,000-5,000
14 BYJU's 21,00 US$ India Bengaluru E-Learning, EdTech, Education, Higher Education, Software 2008 Byju Raveendran, Divya Gokulnath $5,182.78M 1,000-5,000
15 J&T Express 20,00 US$ Indonesia Jakarta Courier Service, E-Commerce, Freight Service 2015 Jet Lee, Tony Chen $4,653.00M 10.000
16 Xiaohongshu 20,00 US$ China Shanghai Shanghai E-Commerce, Mobile Apps, Shopping, Social 2013 Charlwin Mao Wenchao, Miranda Qu $917.50M 1,000-5,000
17 Fanatics 18,00 US$ United States Florida Jacksonville Manufacturing, Retail, Sporting Goods, Sports 1995 Alan Trager, Michael G. Rubin, Mitch Trager $1,170.29M 1,000-5,000
18 Miro 17,50 US$ United States California San Francisco B2B, Enterprise Applications, Enterprise Software, Product Management, UX Design 2011 Andrey Khusid, Oleg Shardin, Oleg Shardin $355.00M 1,000-5,000
19 Yuanfudao 15,50 US$ China Beijing Beijing E-Learning, EdTech, Tutoring 2012 Ke Shuai, Xin Li, Yong Li $4,044.20M 10.000
20 Ripple 15,00 US$ United States California San Francisco Blockchain, Cryptocurrency, FinTech, Internet, Payments 2012 Arthur Britto, Chris Larsen, Jed McCaleb, Ryan Fugger $293.90M 500-1,000
21 DJI Innovations 15,00 US$ China Guangdong Shenzhen Aerospace, Consumer Electronics, Drones, Manufacturing, Photography, Wireless 2006 Frank Wang $1,135.00M 10.000
22 goPuff 15,00 US$ United States Pennsylvania Philadelphia Delivery Service, E-Commerce, Food Delivery, Grocery, Mobile Apps 2013 Rafael Ilishayev, Yakir Gola $290.88M 5,000-10,000
23 SHEIN 15,00 US$ China Guangdong Shenzhen Consumer, E-Commerce, Fashion, Marketplace, Textiles 2008 Xiaoqing Ren, Yang Pei, Yangtian Xu $553.36M 1,000-5,000
24 Yuanqi Senlin 15,00 US$ China Beijing Beijing Food and Beverage 2016 Binsen Tang $721.31M 5,000-10,000
25 Plaid 13,40 US$ United States California San Francisco Finance, Financial Services, FinTech 2012 William Hockey, Zachary Perret $734.80M 500-1,000
26 OpenSea 13,30 US$ United States New York New York Blockchain, Cryptocurrency, Marketplace 2017 Alex Atallah, Devin Finzer $425.12M 100-250
27 Grammarly 13,00 US$ United States California San Francisco Assistive Technology, Information Technology, Productivity Tools 2009 Alex Shevchenko, Dmytro Lider, Max Lytvyn $400.00M 500-1,000
28 Devoted Health 12,60 US$ United States Minnesota Saint Paul Elder Care, Elderly, Health Care, Hospital 2017 Ed Park, Jeremy Delinsky, Todd Park $1,968.95M 1,000-5,000
29 Faire 12,40 US$ United States California San Francisco E-Commerce, Marketplace, Retail, Retail Technology, Wholesale 2017 Daniele Perito, Jeffrey Kolovson, Lauren Cooks Levitan, Marcelo Cortes, Max Rhodes $1,096.12M 500-1,000
30 Brex 12,30 US$ United States Utah Draper Banking, Credit Cards, Financial Services, FinTech 2017 Henrique Dubugras, Pedro Franceschi $1,490.12M 500-1,000
31 Biosplice Therapeutics 12,00 US$ United States California San Diego Biotechnology, Health Care, Life Science 2008 Osman Kibar $285.71M 50-100
32 Bitmain Technologies 12,00 US$ China Beijing Beijing Application Specific Integrated Circuit (ASIC), Bitcoin, Electronics, Manufacturing, Semiconductor 2013 Jihan Wu, Micree Zhan $450.00M 100-250
33 GoodLeap 12,00 US$ United States California Roseville Lending, Renewable Energy 2003 Hayes Barnard $1,800.00M 1,000-5,000
34 JUUL Labs 12,00 US$ United States California San Francisco B2C, Consumer Electronics, Consumer Goods, Leisure, Lifestyle 2015 Adam Bowen, James Monsees, Kevin Burns, Tim Danaher $15,371.68M 1,000-5,000
35 Airtable 11,70 US$ United States California San Francisco Collaboration, Database, Developer Tools, SaaS 2013 Andrew Ofstad, Emmett Nicholas, Howie Liu $2,236.60M 250-500
36 ZongMu Technology 11,40 US$ China Shanghai Shanghai Automotive, Autonomous Vehicles, Robotics 2013 Rui Tang $210.83M 10-50
37 Global Switch 11,10 US$ United Kingdom England London Data Center, Real Estate, Wholesale 1998 Andy Ruhan $6,254.75M 250-500
38 Bolt 11,00 US$ United States California San Francisco E-Commerce, Fraud Detection, Mobile Payments, Payments 2014 Eric Feldman, Ryan Breslow $963.00M 500-1,000
39 Celonis 11,00 US$ Germany Munich Analytics, Business Intelligence, SaaS, Software 2011 Alexander Rinke, Bastian Nominacher, Martin Klenk $1,367.50M 1,000-5,000
40 Weilong 10,88 US$ China Henan Luohe Food and Beverage, Manufacturing, Snack Food 1999 Liu Fuping, Liu Weiping $559.74M No Data
41 Swiggy 10,70 US$ India Bengaluru E-Commerce Platforms, Food Delivery, Mobile Apps 2014 Nandan Reddy, Phani Kishan Addepalli, Rahul Jaimini, Sriharsha Majety $3,571.00M 10.000
42 Figma 10,00 US$ United States California San Francisco Developer Tools, Graphic Design, Software, UX Design, Web Design 2012 Dylan Field, Evan Wallace $333.50M 250-500
43 Talkdesk 10,00 US$ United States California San Francisco Cloud Computing, CRM, Customer Service, SaaS 2011 Cristina Fonseca, Tiago Paiva $504.77M 1,000-5,000
44 Digital Currency Group 10,00 US$ United States New York New York Bitcoin, Blockchain, Financial Services, Venture Capital 2015 Barry Silbert $600.00M 50-100
45 Gusto 10,00 US$ United States California San Francisco Employee Benefits, Enterprise Software, Financial Services, FinTech, Human Resources, SaaS 2011 Edward Kim, Joshua Reeves, Tomer London $930.83M 1,000-5,000
46 Lalamove 10,00 US$ Hong Kong Cheung Sha Wan Apps, Delivery, Logistics, Supply Chain Management, Transportation 2013 Chow Shing Yuk, Gary Hui, Santit Jirawongkraisorn $2,475.00M 500-1,000
47 Notion Labs 10,00 US$ United States California San Francisco Apps, Collaboration, Product Management, Real Time, Software 2016 Ivan Zhao, Simon Last $342.00M 250-500
48 reddit 10,00 US$ United States California San Francisco Content, News, Social Bookmarking, Social Media, Social Network 2005 Aaron Swartz, Alexis Ohanian, Steve Huffman $1,487.23M 500-1,000
49 Thrasio 10,00 US$ United States Massachusetts Walpole Brand Marketing, Consumer Goods, E-Commerce 2018 Carlos Cashman, Joshua Silberstein $3,396.46M 1,000-5,000
50 OYO Rooms 9,60 US$ India Gurugram Hospitality, Travel, Travel Accommodations 2012 Ritesh Agarwal $3,113.68M 5,000-10,000
51 OutSystems 9,50 US$ United States Massachusetts Boston Data Integration, Developer Platform, Developer Tools, PaaS, SaaS, Software 2001 Paulo Rosado, Rui Pereira $208.00M 1,000-5,000
52 ServiceTitan 9,50 US$ United States California Glendale CRM, Home Services, Information Technology, SaaS 2012 Ara Mahdessian, Vahe Kuzoyan $1,098.84M 1,000-5,000
53 HEYTEA 9,28 US$ China Guangdong Shenzhen Food and Beverage, Tea 2012 Yunqi Nie $579.23M 1,000-5,000
54 N26 9,23 US$ Germany Berlin Banking, Finance, Financial Services, FinTech 2013 Maximilian Tayenthal, Valentin Stalf $1,722.36M 1,000-5,000
55 Klaviyo 9,20 US$ United States Massachusetts Boston Advertising, Analytics, E-Commerce, Marketing, Marketing Automation, Software 2012 Andrew Bialecki, Ed Hallen $678.50M 500-1,000
56 Northvolt 9,08 US$ Sweden Stockholm Battery, Clean Energy, CleanTech, Electronics, Manufacturing 2016 Paolo Cerruti, Peter Carlsson $6,162.15M 1,000-5,000
57 Chehaoduo 9,00 US$ China Beijing Beijing Automotive, E-Commerce, Online Auctions 2015 Mark Yang $696.76M 10.000
58 Niantic 9,00 US$ United States California San Francisco Augmented Reality, Software, Video Games, Virtual Reality 2015 John Hanke, Phil Keslin $770.00M 500-1,000
59 Tanium 9,00 US$ United States Washington Kirkland Cyber Security, Enterprise Software, Information Technology, SaaS, Security 2007 David Hindawi, Orion Hindawi $4,376.50M 1,000-5,000
60 Rapyd 8,75 US$ United Kingdom England London Financial Services, FinTech, Mobile Payments, Payments 2016 Arik Shtilman, Arkady Karpman, Omer Priel $775.00M 250-500
61 Kavak 8,70 US$ Mexico Lerma de Villada Automotive, E-Commerce, E-Commerce Platforms, Online Portals 2016 Carlos Julio Garcia, Roger Laughlin $1,188.00M 1,000-5,000
62 Nuro 8,60 US$ United States California Mountain View Autonomous Vehicles, Fleet Management, Information Technology, Robotics, Transportation 2016 Dave Ferguson, Jiajun Zhu $2,132.00M 1,000-5,000
63 Snyk 8,60 US$ United States Massachusetts Boston Cyber Security, Internet, Security, Software 2015 Assaf Hefetz, Danny Grander, Guy Podjarny, Jacob Tarango $1,026.09M 500-1,000
64 Bolt 8,40 US$ Estonia Tallinn Car Sharing, Electric Vehicle, Food Delivery, Grocery, Last Mile Transportation, Mobile Apps, Public Transportation, Ride Sharing, Transportation 2013 Markus Villig, Martin Villig, Oliver Leisalu $1,970.53M 1,000-5,000
65 Tipalti 8,30 US$ United States California San Mateo Accounting, Financial Services, FinTech, Payments, Software 2010 Chen Amit, Oren Zeev $502.50M 500-1,000
66 Lacework 8,30 US$ United States California San Jose Cloud Security, Compliance, Cyber Security, Developer Tools 2015 Mike Speiser, Sanjay Kalra, Vikram Kapoor $1,906.70M 500-1,000
67 Tempus 8,10 US$ United States Illinois Chicago Artificial Intelligence, Biotechnology, Health Care, Machine Learning, Medical 2015 Eric Lefkofsky $1,070.00M 1,000-5,000
68 Fireblocks 8,00 US$ United States New York New York Blockchain, Cryptocurrency, Cyber Security 2018 Idan Ofrat, Michael Shaulov, Pavel Berengoltz $1,039.00M 100-250
69 Dream11 8,00 US$ India Mumbai Fantasy Sports, Information Technology, Sports 2007 Bhavit Sheth, Harsh Jain $1,165.08M 500-1,000
70 Xingsheng Selected 8,00 US$ China Hunan Changsha E-Commerce, Food and Beverage, Grocery, Retail, Shopping 2009 Lihua Yue $5,040.00M 10.000
71 Caris Life Sciences 7,83 US$ United States Texas Irving Biotechnology, Health Care, Health Diagnostics, Medical 1996 David D. Halbert $1,314.49M 1,000-5,000
72 Hopin 7,75 US$ United Kingdom England London Events, Meeting Software, Video Conferencing 2019 Johnny Boufarhat $1,021.73M 500-1,000
73 Dapper Labs 7,60 US$ Canada British Columbia Vancouver Blockchain, Gaming, Software 2018 Dieter Shirley, Mack Flavelle, Roham Gharegozlou $665.07M 100-250
74 Getir 7,50 US$ Turkey Istanbul Delivery Service, E-Commerce, Logistics, Mobile Apps 2015 Arkady Volozh, Mert Salur, Nazım Salur, Serkan Borançılı, Tuncay Tütek $1,172.00M 1,000-5,000
75 Razorpay 7,50 US$ India Bengaluru Finance, Financial Services, FinTech, Payments 2013 Harshil Mathur, Shashank Kumar $741.62M 1,000-5,000
76 Netskope 7,50 US$ United States California Santa Clara Cloud Security, Cyber Security, Enterprise Software, Software 2012 Krishna Narayanaswamy, Lebin Cheng, Ravi Ithal, Sanjay Beri $1,040.10M 1,000-5,000
77 Ola Cabs 7,50 US$ India Bengaluru Apps, Mobile, Ride Sharing, Transportation 2011 Ankit Bhati, Bhavish Aggarwal $5,008.30M 5,000-10,000
78 Carta 7,40 US$ United States California San Francisco Finance, FinTech, Software, Stock Exchanges 2012 Henry Ward, Manu Kumar $1,157.80M 1,000-5,000
79 Toss 7,40 US$ South Korea Seoul Financial Services, FinTech, Mobile Apps, Mobile Payments, Personal Finance 2013 Seunggun Lee $844.20M 500-1,000
80 Scale AI 7,30 US$ United States California San Francisco Artificial Intelligence, Image Recognition, Machine Learning, SaaS 2016 Alexandr Wang, Lucy Guo $602.82M 250-500
81 TripActions 7,25 US$ United States California Palo Alto Business Travel, Customer Service, Payments, Software 2015 Ariel Cohen, Ilan Twig $1,040.48M 1,000-5,000
82 Argo AI 7,25 US$ United States Pennsylvania Pittsburgh Artificial Intelligence, Autonomous Vehicles, Robotics, Transportation 2016 Bryan Salesky, Peter Rander $500.00M 1,000-5,000
83 Gong 7,25 US$ United States California Palo Alto Artificial Intelligence, CRM, Enterprise Software, Information Technology, Machine Learning, Sales, Software 2015 Amit Bendov, Eilon Reshef $583.00M 500-1,000
84 Gemini 7,10 US$ United States New York New York Cryptocurrency, Finance, Financial Services, FinTech 2015 Cameron Winklevoss, Tyler Winklevoss $400.00M 500-1,000
85 Discord 7,00 US$ United States California San Francisco Communities, Messaging, Social Network, Software, Video Chat 2012 Jason Citron, Stanislav Vishnevskiy $979.30M 500-1,000
86 We Doctor 7,00 US$ China Zhejiang Hangzhou Health Care, Hospitality, Internet, Medical 2010 Liao Jieyuan $1,786.00M 1,000-5,000
87 1Password 6,80 US$ Canada Ontario Toronto Cyber Security, Network Security, Privacy, Software 2005 Dave Teare, Natalia Karimov, Roustem Karimov, Sara Teare $920.14M 250-500
88 Automation Anywhere 6,80 US$ United States California San Jose Artificial Intelligence, Enterprise Software, Machine Learning, SaaS, Software 2003 Ankur Kothari, Mihir Shukla, Neeti Mehta, Rushabh Parmani $840.00M 1,000-5,000
89 Ziroom 6,60 US$ China Beijing Beijing Real Estate, Rental, Rental Property 2011 Lin Xiong $2,121.00M 10.000
90 National Stock Exchange of India 6,50 US$ India Mumbai Financial Services, FinTech, Stock Exchanges 1992 Mukesh Agarwal $149.50M 250-500
91 Mollie 6,50 US$ Netherlands Amsterdam E-Commerce, Financial Services, FinTech, Mobile Payments 2004 Adriaan Mol $934.32M 250-500
92 Rippling 6,50 US$ United States California San Francisco Employment, Human Resources, Information Technology, IT Management, Productivity Tools 2017 Parker Conrad, Prasanna Sankar $447.12M 500-1,000
93 DataRobot 6,30 US$ United States Massachusetts Boston Artificial Intelligence, Enterprise Software, Machine Learning, SaaS 2012 Jeremy Achin, Thomas DeGodoy $1,089.37M 1,000-5,000
94 Personio 6,30 US$ Germany Munich Employment, Human Resources, Recruiting, SaaS 2015 Arseniy Vershinin, Hanno Renner, Ignaz Forstmeier, Ignaz Forstmeier, Roman Schumacher $524.83M 500-1,000
95 Upgrade 6,28 US$ United States California San Francisco Banking, Credit, Financial Services, FinTech, Mobile 2016 Adelina Grozdanova, Jeff Bogan, Matt Wierman, Renaud Laplanche, Soul Htite, Visar Nimani $562.50M 250-500
96 Hinge Health 6,20 US$ United States California San Francisco Health Care, Medical, Therapeutics, Wearables 2015 Daniel Perez, Gabriel Mecklenburg $853.85M 1,000-5,000
97 Benchling 6,10 US$ United States California San Francisco Biotechnology, Life Science, Software 2012 Ashutosh Singhal, Cory Li, Sajith Wickramasekara $412.00M 500-1,000
98 Black Unicorn Factory 6,10 US$ United States California Los Angeles 2020 Johnny Stewart $645M No Data
99 Better.com 6,00 US$ United States New York New York Consumer Lending, Financial Services, FinTech, Lending, Real Estate 2016 Eric Wilson, Erik Bernhardsson, Shawn Low, Viral Shah, Vishal Garg $1,655.00M 5,000-10,000
100 Wiz 6,00 US$ Israel Tel Aviv Cloud Security, Cyber Security, Enterprise Software, Security 2020 Ami Luttwak, Assaf Rappaport, Roy Reznik, Yinon Costica $600.00M 100-250
101 iCapital Network 6,00 US$ United States New York New York Asset Management, Banking, Financial Services, FinTech 2013 Dan Vene, John Robertshaw, Nick Veronis, Phil Pool $181.50M 250-500

View File

@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [
{
@ -108,7 +108,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -128,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -195,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -311,7 +311,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@ -391,7 +391,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -433,7 +433,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -476,7 +476,7 @@
"<Axes: title={'center': 'Population 2020'}, xlabel='Continent'>"
]
},
"execution_count": 13,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
@ -543,7 +543,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -552,7 +552,7 @@
"<Axes: ylabel='Frequency'>"
]
},
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
@ -580,7 +580,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -610,7 +610,7 @@
"<Axes: xlabel='Country (or dependency)', ylabel='Population 2020'>"
]
},
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
@ -639,7 +639,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -656,7 +656,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"outputs": [
{

File diff suppressed because it is too large Load Diff

723
lec2unicorns.ipynb Normal file
View File

@ -0,0 +1,723 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Загрузка данных в DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 100 entries, Bytedance to iCapital Network\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Valuation 100 non-null object\n",
" 1 Country 100 non-null object\n",
" 2 State 79 non-null object\n",
" 3 City 99 non-null object\n",
" 4 Industries 99 non-null object\n",
" 5 FoundedYear 100 non-null int64 \n",
" 6 Name of Founders 100 non-null object\n",
" 7 TotalFunding 100 non-null object\n",
" 8 Number of Employees 100 non-null object\n",
"dtypes: int64(1), object(8)\n",
"memory usage: 7.8+ KB\n",
"(100, 10)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Valuation</th>\n",
" <th>Country</th>\n",
" <th>State</th>\n",
" <th>City</th>\n",
" <th>Industries</th>\n",
" <th>FoundedYear</th>\n",
" <th>Name of Founders</th>\n",
" <th>TotalFunding</th>\n",
" <th>Number of Employees</th>\n",
" <th>IsChina</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Company</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Bytedance</th>\n",
" <td>140.0</td>\n",
" <td>China</td>\n",
" <td>Beijing</td>\n",
" <td>Beijing</td>\n",
" <td>Content, Data Mining, Internet</td>\n",
" <td>2012</td>\n",
" <td>Yiming Zhang</td>\n",
" <td>7440.00</td>\n",
" <td>10.000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SpaceX</th>\n",
" <td>100.3</td>\n",
" <td>United States</td>\n",
" <td>California</td>\n",
" <td>Hawthorne</td>\n",
" <td>Aerospace, Manufacturing, Space Travel, Transp...</td>\n",
" <td>2002</td>\n",
" <td>Elon Musk</td>\n",
" <td>383.02</td>\n",
" <td>5,000-10,000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Stripe</th>\n",
" <td>95.0</td>\n",
" <td>United States</td>\n",
" <td>California</td>\n",
" <td>San Francisco</td>\n",
" <td>Finance, FinTech, Mobile Payments, SaaS</td>\n",
" <td>2010</td>\n",
" <td>John Collison, Patrick Collison</td>\n",
" <td>300.00</td>\n",
" <td>1,000-5,000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Klarna</th>\n",
" <td>45.6</td>\n",
" <td>Sweden</td>\n",
" <td>NaN</td>\n",
" <td>Stockholm</td>\n",
" <td>E-Commerce, FinTech, Payments, Shopping</td>\n",
" <td>2005</td>\n",
" <td>Niklas Adalberth, Sebastian Siemiatkowski, Vic...</td>\n",
" <td>3471.72</td>\n",
" <td>5,000-10,000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Epic Games</th>\n",
" <td>42.0</td>\n",
" <td>United States</td>\n",
" <td>North Carolina</td>\n",
" <td>Cary</td>\n",
" <td>Developer Platform, Gaming, Software, Video Games</td>\n",
" <td>1991</td>\n",
" <td>Mark Rein, Tim Sweeney</td>\n",
" <td>544.93</td>\n",
" <td>1,000-5,000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Valuation Country State City \\\n",
"Company \n",
"Bytedance 140.0 China Beijing Beijing \n",
"SpaceX 100.3 United States California Hawthorne \n",
"Stripe 95.0 United States California San Francisco \n",
"Klarna 45.6 Sweden NaN Stockholm \n",
"Epic Games 42.0 United States North Carolina Cary \n",
"\n",
" Industries FoundedYear \\\n",
"Company \n",
"Bytedance Content, Data Mining, Internet 2012 \n",
"SpaceX Aerospace, Manufacturing, Space Travel, Transp... 2002 \n",
"Stripe Finance, FinTech, Mobile Payments, SaaS 2010 \n",
"Klarna E-Commerce, FinTech, Payments, Shopping 2005 \n",
"Epic Games Developer Platform, Gaming, Software, Video Games 1991 \n",
"\n",
" Name of Founders TotalFunding \\\n",
"Company \n",
"Bytedance Yiming Zhang 7440.00 \n",
"SpaceX Elon Musk 383.02 \n",
"Stripe John Collison, Patrick Collison 300.00 \n",
"Klarna Niklas Adalberth, Sebastian Siemiatkowski, Vic... 3471.72 \n",
"Epic Games Mark Rein, Tim Sweeney 544.93 \n",
"\n",
" Number of Employees IsChina \n",
"Company \n",
"Bytedance 10.000 1 \n",
"SpaceX 5,000-10,000 0 \n",
"Stripe 1,000-5,000 0 \n",
"Klarna 5,000-10,000 0 \n",
"Epic Games 1,000-5,000 0 "
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"data/unicorns.csv\", index_col=\"Company\", sep=';')\n",
"\n",
"df.info()\n",
"\n",
"df[\"Valuation\"] = df[\"Valuation\"].apply(\n",
" lambda x: float(x[:-4].replace(',', '.')),\n",
")\n",
"\n",
"df[\"TotalFunding\"] = df[\"TotalFunding\"].apply(\n",
" lambda x: float(x.strip(\"$M\").replace(\",\", \"\")),\n",
")\n",
"\n",
"df[\"IsChina\"] = [int(country == 'China') for country in df[\"Country\"]]\n",
"print(df.shape)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение сведений о пропущенных данных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Типы пропущенных данных:\n",
"- None - представление пустых данных в Python\n",
"- NaN - представление пустых данных в Pandas\n",
"- '' - пустая строка"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Valuation 0\n",
"Country 0\n",
"State 21\n",
"City 1\n",
"Industries 1\n",
"FoundedYear 0\n",
"Name of Founders 0\n",
"TotalFunding 0\n",
"Number of Employees 0\n",
"IsChina 0\n",
"dtype: int64\n",
"\n",
"Valuation False\n",
"Country False\n",
"State True\n",
"City True\n",
"Industries True\n",
"FoundedYear False\n",
"Name of Founders False\n",
"TotalFunding False\n",
"Number of Employees False\n",
"IsChina False\n",
"dtype: bool\n",
"\n",
"State процент пустых значений: %21.00\n",
"City процент пустых значений: %1.00\n",
"Industries процент пустых значений: %1.00\n"
]
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
" print(f\"{i} процент пустых значений: %{null_rate:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Заполнение пропущенных данных\n",
"\n",
"https://pythonmldaily.com/posts/pandas-dataframes-search-drop-empty-values\n",
"\n",
"https://scales.arabpsychology.com/stats/how-to-fill-nan-values-with-median-in-pandas/"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"# fillna_df = df.fillna(0)\n",
"\n",
"# print(fillna_df.shape)\n",
"\n",
"# print(fillna_df.isnull().any())\n",
"\n",
"# # Замена пустых данных на 0\n",
"# df[\"AgeFillNA\"] = df[\"Age\"].fillna(0)\n",
"\n",
"# # Замена пустых данных на медиану\n",
"# df[\"AgeFillMedian\"] = df[\"Age\"].fillna(df[\"Age\"].median())\n",
"\n",
"# df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"# df[\"AgeCopy\"] = df[\"Age\"]\n",
"\n",
"# # Замена данных сразу в DataFrame без копирования\n",
"# df.fillna({\"AgeCopy\": 0}, inplace=True)\n",
"\n",
"# df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление наблюдений с пропусками"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100, 7)\n",
"Valuation False\n",
"Country False\n",
"FoundedYear False\n",
"Name of Founders False\n",
"TotalFunding False\n",
"Number of Employees False\n",
"IsChina False\n",
"dtype: bool\n"
]
}
],
"source": [
"df = df.dropna(axis=1)\n",
"\n",
"print(df.shape)\n",
"\n",
"print(df.isnull().any())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Создание выборок данных\n",
"\n",
"Библиотека scikit-learn\n",
"\n",
"https://scikit-learn.org/stable/index.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"assets/lec2-split.png\" width=\"600\" style=\"background-color: white\">"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"# Функция для создания выборок\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
" \"\"\"\n",
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
" following fractional ratios provided by the user, where each subset is\n",
" stratified by the values in a specific column (that is, each subset has\n",
" the same relative frequency of the values in the column). It performs this\n",
" splitting by running train_test_split() twice.\n",
"\n",
" Parameters\n",
" ----------\n",
" df_input : Pandas dataframe\n",
" Input dataframe to be split.\n",
" stratify_colname : str\n",
" The name of the column that will be used for stratification. Usually\n",
" this column would be for the label.\n",
" frac_train : float\n",
" frac_val : float\n",
" frac_test : float\n",
" The ratios with which the dataframe will be split into train, val, and\n",
" test data. The values should be expressed as float fractions and should\n",
" sum to 1.0.\n",
" random_state : int, None, or RandomStateInstance\n",
" Value to be passed to train_test_split().\n",
"\n",
" Returns\n",
" -------\n",
" df_train, df_val, df_test :\n",
" Dataframes containing the three splits.\n",
" \"\"\"\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
"\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IsChina\n",
"0 86\n",
"1 14\n",
"Name: count, dtype: int64\n",
"Обучающая выборка: (60, 3)\n",
"IsChina\n",
"0 52\n",
"1 8\n",
"Name: count, dtype: int64\n",
"Контрольная выборка: (20, 3)\n",
"IsChina\n",
"0 17\n",
"1 3\n",
"Name: count, dtype: int64\n",
"Тестовая выборка: (20, 3)\n",
"IsChina\n",
"0 17\n",
"1 3\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"# Вывод распределения количества наблюдений по меткам (классам)\n",
"print(df.IsChina.value_counts())\n",
"\n",
"data = df[[\"TotalFunding\", \"Valuation\", \"IsChina\"]].copy()\n",
"\n",
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
" data,\n",
" stratify_colname=\"IsChina\",\n",
" frac_train=0.60,\n",
" frac_val=0.20,\n",
" frac_test=0.20,\n",
")\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.IsChina.value_counts())\n",
"\n",
"print(\"Контрольная выборка: \", df_val.shape)\n",
"print(df_val.IsChina.value_counts())\n",
"\n",
"print(\"Тестовая выборка: \", df_test.shape)\n",
"print(df_test.IsChina.value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Выборка с избытком (oversampling)\n",
"\n",
"https://www.blog.trainindata.com/oversampling-techniques-for-imbalanced-data/\n",
"\n",
"https://datacrayon.com/machine-learning/class-imbalance-and-oversampling/\n",
"\n",
"Выборка с недостатком (undersampling)\n",
"\n",
"https://machinelearningmastery.com/random-oversampling-and-undersampling-for-imbalanced-classification/\n",
"\n",
"Библиотека imbalanced-learn\n",
"\n",
"https://imbalanced-learn.org/stable/"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Обучающая выборка: (60, 3)\n",
"IsChina\n",
"0 52\n",
"1 8\n",
"Name: count, dtype: int64\n",
"Обучающая выборка после oversampling: (105, 3)\n",
"IsChina\n",
"1 53\n",
"0 52\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>TotalFunding</th>\n",
" <th>Valuation</th>\n",
" <th>IsChina</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>208.000000</td>\n",
" <td>9.500000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4044.200000</td>\n",
" <td>15.500000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>447.120000</td>\n",
" <td>6.500000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2121.000000</td>\n",
" <td>6.600000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2686.010000</td>\n",
" <td>39.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>1306.334794</td>\n",
" <td>14.179790</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>1492.220325</td>\n",
" <td>10.610196</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>1125.438822</td>\n",
" <td>16.887502</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>103</th>\n",
" <td>1728.312129</td>\n",
" <td>7.708914</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>1785.708076</td>\n",
" <td>7.004370</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>105 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" TotalFunding Valuation IsChina\n",
"0 208.000000 9.500000 0\n",
"1 4044.200000 15.500000 1\n",
"2 447.120000 6.500000 0\n",
"3 2121.000000 6.600000 1\n",
"4 2686.010000 39.000000 0\n",
".. ... ... ...\n",
"100 1306.334794 14.179790 1\n",
"101 1492.220325 10.610196 1\n",
"102 1125.438822 16.887502 1\n",
"103 1728.312129 7.708914 1\n",
"104 1785.708076 7.004370 1\n",
"\n",
"[105 rows x 3 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from imblearn.over_sampling import ADASYN\n",
"\n",
"ada = ADASYN()\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.IsChina.value_counts())\n",
"\n",
"X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"IsChina\"]) # type: ignore\n",
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
"\n",
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
"print(df_train_adasyn.IsChina.value_counts())\n",
"\n",
"df_train_adasyn"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

Binary file not shown.