Population analysis cases
- Requirements:
- Import the file and view the original data
- Summarize population data and state abbreviation data
- Delete the duplicate abstraction column in the summarized data
- View the columns with missing data in the summarized data
- Find out which states / regions make the value of state NaN in the summarized data, and perform the de duplication operation
- Fill in the correct values for the state items of these states / regions found, so as to remove all NaN in the state column
- The summarized data and the area data of each state are summarized
- We will find the missing data in the area(sq.mi) column and find out which rows
- Remove rows with missing data
- Find the population data for 2010
- Calculate the population density of each state
import pandas as pd
import numpy as np
# The full name of state is abbreviated as abreviation state
abb = pd.read_csv('./state-abbrevs.csv')
abb
| state | abbreviation |
---|
0 | Alabama | AL |
---|
1 | Alaska | AK |
---|
2 | Arizona | AZ |
---|
3 | Arkansas | AR |
---|
4 | California | CA |
---|
5 | Colorado | CO |
---|
6 | Connecticut | CT |
---|
7 | Delaware | DE |
---|
8 | District of Columbia | DC |
---|
9 | Florida | FL |
---|
10 | Georgia | GA |
---|
11 | Hawaii | HI |
---|
12 | Idaho | ID |
---|
13 | Illinois | IL |
---|
14 | Indiana | IN |
---|
15 | Iowa | IA |
---|
16 | Kansas | KS |
---|
17 | Kentucky | KY |
---|
18 | Louisiana | LA |
---|
19 | Maine | ME |
---|
20 | Montana | MT |
---|
21 | Nebraska | NE |
---|
22 | Nevada | NV |
---|
23 | New Hampshire | NH |
---|
24 | New Jersey | NJ |
---|
25 | New Mexico | NM |
---|
26 | New York | NY |
---|
27 | North Carolina | NC |
---|
28 | North Dakota | ND |
---|
29 | Ohio | OH |
---|
30 | Oklahoma | OK |
---|
31 | Oregon | OR |
---|
32 | Maryland | MD |
---|
33 | Massachusetts | MA |
---|
34 | Michigan | MI |
---|
35 | Minnesota | MN |
---|
36 | Mississippi | MS |
---|
37 | Missouri | MO |
---|
38 | Pennsylvania | PA |
---|
39 | Rhode Island | RI |
---|
40 | South Carolina | SC |
---|
41 | South Dakota | SD |
---|
42 | Tennessee | TN |
---|
43 | Texas | TX |
---|
44 | Utah | UT |
---|
45 | Vermont | VT |
---|
46 | Virginia | VA |
---|
47 | Washington | WA |
---|
48 | West Virginia | WV |
---|
49 | Wisconsin | WI |
---|
50 | Wyoming | WY |
---|
#state/region: abbreviation of state: ages: age year time population
pop = pd.read_csv('./state-population.csv')
#Full name of state area (sq. mi)
area = pd.read_csv('./state-areas.csv')
abb_pop = pd.merge(left=abb,right=pop,left_on='abbreviation',right_on='state/region',how='outer')
abb_pop
| state | abbreviation | state/region | ages | year | population |
---|
0 | Alabama | AL | AL | under18 | 2012 | 1117489.0 |
---|
1 | Alabama | AL | AL | total | 2012 | 4817528.0 |
---|
2 | Alabama | AL | AL | under18 | 2010 | 1130966.0 |
---|
3 | Alabama | AL | AL | total | 2010 | 4785570.0 |
---|
4 | Alabama | AL | AL | under18 | 2011 | 1125763.0 |
---|
... | ... | ... | ... | ... | ... | ... |
---|
2539 | NaN | NaN | USA | total | 2010 | 309326295.0 |
---|
2540 | NaN | NaN | USA | under18 | 2011 | 73902222.0 |
---|
2541 | NaN | NaN | USA | total | 2011 | 311582564.0 |
---|
2542 | NaN | NaN | USA | under18 | 2012 | 73708179.0 |
---|
2543 | NaN | NaN | USA | total | 2012 | 313873685.0 |
---|
2544 rows × 6 columns
# Delete the duplicate abstraction column
abb_pop.drop(labels='abbreviation',axis=1,inplace=True)
# View columns with missing data
abb_pop.isnull().any(axis=0)
state True
state/region False
ages False
year False
population True
dtype: bool
# Summarize the data to find out which states / regions make the value of state NaN and perform the operation of de duplication
# 1. Locate null values in the state column
ex = abb_pop['state'].isnull()
# 2. Take out the row data corresponding to the empty value of state
abb_pop.loc[ex]
# 3. Get the abbreviation data corresponding to empty state
abb_pop.loc[ex]['state/region']
# 4. De duplicate the abbreviation obtained in step 3
abb_pop.loc[ex]['state/region'].unique()
array(['PR', 'USA'], dtype=object)
# Fill in the correct values for the state items of these states / regions found, so as to remove all NaN in the state column
# 1. Locate the full name corresponding to the abbreviation of USA
abb_pop['state/region'] == 'USA'
# 2. The above Boolean value is used as the row index of metadata to get the row data of USA
abb_pop.loc[abb_pop['state/region'] == 'USA']
# 3. Get row index of USA
indexs = abb_pop.loc[abb_pop['state/region'] == 'USA'].index
#4. Batch fill the values of the state column of the indexs rows into the full name of USA
abb_pop.loc[indexs,'state'] = 'United State'
abb_pop['state/region'] == 'PR'
abb_pop.loc[abb_pop['state/region'] == 'PR']
indexs = abb_pop.loc[abb_pop['state/region'] == 'PR'].index
abb_pop.loc[indexs,'state'] = 'PPPRRR'
#Detect whether there is still a null value in the state column
abb_pop['state'].isnull().sum()
# The summarized data and the area data of each state are summarized
abb_pop_area = pd.merge(left=abb_pop,right=area,on='state',how='outer')
abb_pop_area.head()
| state | state/region | ages | year | population | area (sq. mi) |
---|
0 | Alabama | AL | under18 | 2012.0 | 1117489.0 | 52423.0 |
---|
1 | Alabama | AL | total | 2012.0 | 4817528.0 | 52423.0 |
---|
2 | Alabama | AL | under18 | 2010.0 | 1130966.0 | 52423.0 |
---|
3 | Alabama | AL | total | 2010.0 | 4785570.0 | 52423.0 |
---|
4 | Alabama | AL | under18 | 2011.0 | 1125763.0 | 52423.0 |
---|
# Find the missing data in the area(sq.mi) column, find out which rows are and remove the rows containing the missing data
abb_pop_area['area (sq. mi)'].isnull()
# Take out the row data corresponding to the hollow area (sq. mi) column
abb_pop_area.loc[abb_pop_area['area (sq. mi)'].isnull()]
# The row index corresponding to the area (sq. mi) column is obtained
indexs = abb_pop_area.loc[abb_pop_area['area (sq. mi)'].isnull()].index
Int64Index([2448, 2449, 2450, 2451, 2452, 2453, 2454, 2455, 2456, 2457, 2458,
2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469,
2470, 2471, 2472, 2473, 2474, 2475, 2476, 2477, 2478, 2479, 2480,
2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, 2489, 2490, 2491,
2492, 2493, 2494, 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502,
2503, 2504, 2505, 2506, 2507, 2508, 2509, 2510, 2511, 2512, 2513,
2514, 2515, 2516, 2517, 2518, 2519, 2520, 2521, 2522, 2523, 2524,
2525, 2526, 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, 2535,
2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543],
dtype='int64')
# Remove rows with missing data
abb_pop_area.drop(labels=indexs,axis=0,inplace=True)
# Find the population data for 2010
abb_pop_area.query('year == 2010 & ages == "total"')
| state | state/region | ages | year | population | area (sq. mi) |
---|
3 | Alabama | AL | total | 2010.0 | 4785570.0 | 52423.0 |
---|
91 | Alaska | AK | total | 2010.0 | 713868.0 | 656425.0 |
---|
101 | Arizona | AZ | total | 2010.0 | 6408790.0 | 114006.0 |
---|
189 | Arkansas | AR | total | 2010.0 | 2922280.0 | 53182.0 |
---|
197 | California | CA | total | 2010.0 | 37333601.0 | 163707.0 |
---|
283 | Colorado | CO | total | 2010.0 | 5048196.0 | 104100.0 |
---|
293 | Connecticut | CT | total | 2010.0 | 3579210.0 | 5544.0 |
---|
379 | Delaware | DE | total | 2010.0 | 899711.0 | 1954.0 |
---|
389 | District of Columbia | DC | total | 2010.0 | 605125.0 | 68.0 |
---|
475 | Florida | FL | total | 2010.0 | 18846054.0 | 65758.0 |
---|
485 | Georgia | GA | total | 2010.0 | 9713248.0 | 59441.0 |
---|
570 | Hawaii | HI | total | 2010.0 | 1363731.0 | 10932.0 |
---|
581 | Idaho | ID | total | 2010.0 | 1570718.0 | 83574.0 |
---|
666 | Illinois | IL | total | 2010.0 | 12839695.0 | 57918.0 |
---|
677 | Indiana | IN | total | 2010.0 | 6489965.0 | 36420.0 |
---|
762 | Iowa | IA | total | 2010.0 | 3050314.0 | 56276.0 |
---|
773 | Kansas | KS | total | 2010.0 | 2858910.0 | 82282.0 |
---|
858 | Kentucky | KY | total | 2010.0 | 4347698.0 | 40411.0 |
---|
869 | Louisiana | LA | total | 2010.0 | 4545392.0 | 51843.0 |
---|
954 | Maine | ME | total | 2010.0 | 1327366.0 | 35387.0 |
---|
965 | Montana | MT | total | 2010.0 | 990527.0 | 147046.0 |
---|
1050 | Nebraska | NE | total | 2010.0 | 1829838.0 | 77358.0 |
---|
1061 | Nevada | NV | total | 2010.0 | 2703230.0 | 110567.0 |
---|
1146 | New Hampshire | NH | total | 2010.0 | 1316614.0 | 9351.0 |
---|
1157 | New Jersey | NJ | total | 2010.0 | 8802707.0 | 8722.0 |
---|
1242 | New Mexico | NM | total | 2010.0 | 2064982.0 | 121593.0 |
---|
1253 | New York | NY | total | 2010.0 | 19398228.0 | 54475.0 |
---|
1338 | North Carolina | NC | total | 2010.0 | 9559533.0 | 53821.0 |
---|
1349 | North Dakota | ND | total | 2010.0 | 674344.0 | 70704.0 |
---|
1434 | Ohio | OH | total | 2010.0 | 11545435.0 | 44828.0 |
---|
1445 | Oklahoma | OK | total | 2010.0 | 3759263.0 | 69903.0 |
---|
1530 | Oregon | OR | total | 2010.0 | 3837208.0 | 98386.0 |
---|
1541 | Maryland | MD | total | 2010.0 | 5787193.0 | 12407.0 |
---|
1626 | Massachusetts | MA | total | 2010.0 | 6563263.0 | 10555.0 |
---|
1637 | Michigan | MI | total | 2010.0 | 9876149.0 | 96810.0 |
---|
1722 | Minnesota | MN | total | 2010.0 | 5310337.0 | 86943.0 |
---|
1733 | Mississippi | MS | total | 2010.0 | 2970047.0 | 48434.0 |
---|
1818 | Missouri | MO | total | 2010.0 | 5996063.0 | 69709.0 |
---|
1829 | Pennsylvania | PA | total | 2010.0 | 12710472.0 | 46058.0 |
---|
1914 | Rhode Island | RI | total | 2010.0 | 1052669.0 | 1545.0 |
---|
1925 | South Carolina | SC | total | 2010.0 | 4636361.0 | 32007.0 |
---|
2010 | South Dakota | SD | total | 2010.0 | 816211.0 | 77121.0 |
---|
2021 | Tennessee | TN | total | 2010.0 | 6356683.0 | 42146.0 |
---|
2106 | Texas | TX | total | 2010.0 | 25245178.0 | 268601.0 |
---|
2117 | Utah | UT | total | 2010.0 | 2774424.0 | 84904.0 |
---|
2202 | Vermont | VT | total | 2010.0 | 625793.0 | 9615.0 |
---|
2213 | Virginia | VA | total | 2010.0 | 8024417.0 | 42769.0 |
---|
2298 | Washington | WA | total | 2010.0 | 6742256.0 | 71303.0 |
---|
2309 | West Virginia | WV | total | 2010.0 | 1854146.0 | 24231.0 |
---|
2394 | Wisconsin | WI | total | 2010.0 | 5689060.0 | 65503.0 |
---|
2405 | Wyoming | WY | total | 2010.0 | 564222.0 | 97818.0 |
---|
# Calculate the population density of each state
midu = abb_pop_area['population'] / abb_pop_area['area (sq. mi)']
abb_pop_area['midu'] = midu
abb_pop_area.head()
| state | state/region | ages | year | population | area (sq. mi) | midu |
---|
0 | Alabama | AL | under18 | 2012.0 | 1117489.0 | 52423.0 | 21.316769 |
---|
1 | Alabama | AL | total | 2012.0 | 4817528.0 | 52423.0 | 91.897221 |
---|
2 | Alabama | AL | under18 | 2010.0 | 1130966.0 | 52423.0 | 21.573851 |
---|
3 | Alabama | AL | total | 2010.0 | 4785570.0 | 52423.0 | 91.287603 |
---|
4 | Alabama | AL | under18 | 2011.0 | 1125763.0 | 52423.0 | 21.474601 |
---|
Data analysis of 2012 US election donation project
import numpy as np
import pandas as pd
# Define month, candidate and political party:
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6,
'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
parties = {
'Bachmann, Michelle': 'Republican',
'Romney, Mitt': 'Republican',
'Obama, Barack': 'Democrat',
"Roemer, Charles E. 'Buddy' III": 'Reform',
'Pawlenty, Timothy': 'Republican',
'Johnson, Gary Earl': 'Libertarian',
'Paul, Ron': 'Republican',
'Santorum, Rick': 'Republican',
'Cain, Herman': 'Republican',
'Gingrich, Newt': 'Republican',
'McCotter, Thaddeus G': 'Republican',
'Huntsman, Jon': 'Republican',
'Perry, Rick': 'Republican'
}
demand
- Load data
- View basic information of data
- Specify data interception, extract the data of the following fields, and discard other data
- cand_nm: name of candidate
- contbr_nm: name of donor
- contbr_st: donor state
- contbr_ Employee: the company of the donor
- contbr_occupation: donor occupation
- contb_receipt_amt: donation amount (USD)
- contb_receipt_dt: date of donation
- Summarize the new data and check whether there is missing data
- Use statistical indicators to quickly describe the summary of numerical attributes.
- Null value processing. Relevant fields may have blank values due to forgetting to fill in or confidentiality. Fill them with NOT PROVIDE
- Exception handling. Delete the data of donation amount < = 0
- Create a new column for the party of each candidate
- Check the different elements in the party column
- Count the occurrence times of each element in the party column
- Check the total amount of political contributions received by each party_ receipt_ amt
- Check the total amount of political contributions received by each party every day_ receipt_ amt
- Convert the date format in the table to 'yyyy MM DD'.
- See who veterans (donor profession) DISABLED VETERAN mainly supports
df = pd.read_csv('./data/usa_election.txt',low_memory=False)
df = df[['cand_nm','contbr_nm','contbr_st','contbr_employer','contbr_occupation','contb_receipt_amt','contb_receipt_dt']]
df.head()
| cand_nm | contbr_nm | contbr_st | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt |
---|
0 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 250.0 | 20-JUN-11 |
---|
1 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 50.0 | 23-JUN-11 |
---|
2 | Bachmann, Michelle | SMITH, LANIER | AL | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 05-JUL-11 |
---|
3 | Bachmann, Michelle | BLEVINS, DARONDA | AR | NONE | RETIRED | 250.0 | 01-AUG-11 |
---|
4 | Bachmann, Michelle | WARDENBURG, HAROLD | AR | NONE | RETIRED | 300.0 | 20-JUN-11 |
---|
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536041 entries, 0 to 536040
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 cand_nm 536041 non-null object
1 contbr_nm 536041 non-null object
2 contbr_st 536040 non-null object
3 contbr_employer 525088 non-null object
4 contbr_occupation 530520 non-null object
5 contb_receipt_amt 536041 non-null float64
6 contb_receipt_dt 536041 non-null object
dtypes: float64(1), object(6)
memory usage: 28.6+ MB
#Fill in all missing data as NOT PROVIDE
df.fillna(value='NOT PROVIDE',inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536041 entries, 0 to 536040
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 cand_nm 536041 non-null object
1 contbr_nm 536041 non-null object
2 contbr_st 536041 non-null object
3 contbr_employer 536041 non-null object
4 contbr_occupation 536041 non-null object
5 contb_receipt_amt 536041 non-null float64
6 contb_receipt_dt 536041 non-null object
dtypes: float64(1), object(6)
memory usage: 28.6+ MB
#Exception handling. Delete the data of donation amount < = 0
(df['contb_receipt_amt'] <= 0).sum()
5727
~(df['contb_receipt_amt'] <= 0)
df = df.loc[~(df['contb_receipt_amt'] <= 0)]
#Create a new column for the party of each candidate
df['party'] = df['cand_nm'].map(parties)
df.head()
| cand_nm | contbr_nm | contbr_st | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | party |
---|
0 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 250.0 | 20-JUN-11 | Republican |
---|
1 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 50.0 | 23-JUN-11 | Republican |
---|
2 | Bachmann, Michelle | SMITH, LANIER | AL | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 05-JUL-11 | Republican |
---|
3 | Bachmann, Michelle | BLEVINS, DARONDA | AR | NONE | RETIRED | 250.0 | 01-AUG-11 | Republican |
---|
4 | Bachmann, Michelle | WARDENBURG, HAROLD | AR | NONE | RETIRED | 300.0 | 20-JUN-11 | Republican |
---|
#Check the different elements in the party column
df['party'].unique()
array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object)
df
| cand_nm | contbr_nm | contbr_st | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | party |
---|
0 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 250.0 | 20-JUN-11 | Republican |
---|
1 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 50.0 | 23-JUN-11 | Republican |
---|
2 | Bachmann, Michelle | SMITH, LANIER | AL | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 05-JUL-11 | Republican |
---|
3 | Bachmann, Michelle | BLEVINS, DARONDA | AR | NONE | RETIRED | 250.0 | 01-AUG-11 | Republican |
---|
4 | Bachmann, Michelle | WARDENBURG, HAROLD | AR | NONE | RETIRED | 300.0 | 20-JUN-11 | Republican |
---|
... | ... | ... | ... | ... | ... | ... | ... | ... |
---|
536036 | Perry, Rick | ANDERSON, MARILEE MRS. | XX | INFORMATION REQUESTED PER BEST EFFORTS | INFORMATION REQUESTED PER BEST EFFORTS | 2500.0 | 31-AUG-11 | Republican |
---|
536037 | Perry, Rick | TOLBERT, DARYL MR. | XX | T.A.C.C. | LONGWALL MAINTENANCE FOREMAN | 500.0 | 30-SEP-11 | Republican |
---|
536038 | Perry, Rick | GRANE, BRYAN F. MR. | XX | INFORMATION REQUESTED PER BEST EFFORTS | INFORMATION REQUESTED PER BEST EFFORTS | 500.0 | 29-SEP-11 | Republican |
---|
536039 | Perry, Rick | DUFFY, DAVID A. MR. | XX | DUFFY EQUIPMENT COMPANY INC. | BUSINESS OWNER | 2500.0 | 30-SEP-11 | Republican |
---|
536040 | Perry, Rick | GORMAN, CHRIS D. MR. | XX | INFORMATION REQUESTED PER BEST EFFORTS | INFORMATION REQUESTED PER BEST EFFORTS | 5000.0 | 29-SEP-11 | Republican |
---|
530314 rows × 8 columns
#Count the occurrence times of each element in the party column
df['party'].value_counts()
Democrat 289999
Republican 234300
Reform 5313
Libertarian 702
Name: party, dtype: int64
#Check the total amount of political contributions received by each party_ receipt_ amt
df.groupby(by='party')['contb_receipt_amt'].sum()
party
Democrat 8.259441e+07
Libertarian 4.132769e+05
Reform 3.429658e+05
Republican 1.251181e+08
Name: contb_receipt_amt, dtype: float64
#Check the total amount of political contributions received by each party every day_ receipt_ amt
df.groupby(by=['contb_receipt_dt','party'])['contb_receipt_amt'].sum()
contb_receipt_dt party
01-APR-11 Reform 50.00
Republican 12635.00
01-AUG-11 Democrat 182198.00
Libertarian 1000.00
Reform 1847.00
...
31-MAY-11 Republican 313839.80
31-OCT-11 Democrat 216971.87
Libertarian 4250.00
Reform 3205.00
Republican 751542.36
Name: contb_receipt_amt, Length: 1183, dtype: float64
#Convert the date format in the table to 'yyyy MM DD'
def transform_date(d):
day,month,year = d.split('-')
month = months[month]
return '20'+year+'-'+str(month)+'-'+day
df['contb_receipt_dt'] = df['contb_receipt_dt'].map(transform_date)
df.head()
| cand_nm | contbr_nm | contbr_st | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | party |
---|
0 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 250.0 | 2011-6-20 | Republican |
---|
1 | Bachmann, Michelle | HARVEY, WILLIAM | AL | RETIRED | RETIRED | 50.0 | 2011-6-23 | Republican |
---|
2 | Bachmann, Michelle | SMITH, LANIER | AL | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 2011-7-05 | Republican |
---|
3 | Bachmann, Michelle | BLEVINS, DARONDA | AR | NONE | RETIRED | 250.0 | 2011-8-01 | Republican |
---|
4 | Bachmann, Michelle | WARDENBURG, HAROLD | AR | NONE | RETIRED | 300.0 | 2011-6-20 | Republican |
---|
# See who the disabled veterans most support
# Take out the data containing only veterans' occupations
df['contbr_occupation'] == 'DISABLED VETERAN'
old_bing_df = df.loc[df['contbr_occupation'] == 'DISABLED VETERAN']
old_bing_df.head()
| cand_nm | contbr_nm | contbr_st | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | party |
---|
149790 | Obama, Barack | MAHURIN, DAVID | FL | VETERANS ADMINISTRATION | DISABLED VETERAN | 10.0 | 2012-1-17 | Democrat |
---|
150910 | Obama, Barack | MAHURIN, DAVID | FL | VETERANS ADMINISTRATION | DISABLED VETERAN | 20.0 | 2012-1-01 | Democrat |
---|
174041 | Obama, Barack | KRUCHTEN, MICHAEL | IL | DISABLED | DISABLED VETERAN | 50.0 | 2011-12-02 | Democrat |
---|
175244 | Obama, Barack | KRUCHTEN, MICHAEL | IL | DISABLED | DISABLED VETERAN | 250.0 | 2011-10-12 | Democrat |
---|
183790 | Obama, Barack | BRYANT, J.L. | KS | RET ARMY | DISABLED VETERAN | 100.0 | 2011-10-12 | Democrat |
---|
old_bing_df.groupby(by='cand_nm')['contb_receipt_amt'].sum()
cand_nm
Cain, Herman 300.00
Obama, Barack 4205.00
Paul, Ron 2425.49
Santorum, Rick 250.00
Name: contb_receipt_amt, dtype: float64