Research Scientist, Fiksu Inc.
About Me
I am particularly interested in the application of Big Data to solve difficult real-world problems. My current research focuses on the design of large-scale storage systems, including distributed file systems and databases, to support big data applications. Previously I have explored low-level operating system support for contributory applications, these are applications such as Folding@Home and the Great Internet Mersenne Prime Search, that allow a user to contribute computing resources to projects from which they do not directly benefit.
Academic Background
I'm a sixth year PhD student in the Computer Science department at Carnegie Mellon University, advised by Greg Ganger.
I hold a Masters degree from the University of Massachusetts at Amherst, where I was co-advised by Mark Corner and Emery Berger. My Bachelor of Science degree is also from UMass, a dual degree in Computer Science and Mathematics.
Awards and Fellowships
- APC Fellowship and Award in Data Center Efficiency Research, 2009
- Best Paper Award, TFS: A Transparent File System for Contributory Storage, FAST, 2007
- Best Student Project, VMware, Cambridge, MA, Presented at VMWorld, 2007
- Best Undergraduate Research in Computer Science, University of Massachusetts, 2005
Research
My current research is focused on exploiting staleness tolerance in system design. To that end, I am working closely with a number of Machine Learning researchers to demonstrate how the error-tolerance of ML algorithms -- staleness tolerance in particular -- can be used in large scale parallel machine learning.
Additionally, I have worked with HP Labs on LazyBase, a distributed database designed for high-throughput updates and inserts, while allowing low latency analytical queries. It does this by exploiting a tradeoff between query result freshness and query latency.
Along with Alexey Tumanov, I am working on AlSched, a system that allows cluster frameworks to specify resource requests as composable algebraic utility functions. The scheduler then optimizes over these functions to create a globally optimal resource assignment.
Papers
@inproceedings{lazytables-hotos2013,
author = {James Cipar and Qirong Ho and Jin Kyu Kim and Seunghak Lee
and Gregory R. Ganger and Garth Gibson and Kimberly Keeton and Eric Xing},
title = {Solving the straggler problem with bounded staleness},
booktitle = {Proc. of the 14th Usenix Workshop on Hot Topics in
Operating Systems},
series = {HotOS '13},
year = {2013},
location = {Santa Ana Pueblo, NM},
Publisher = {Usenix},
}
in heterogeneous clouds
@inproceedings{alsched-socc12,
author = {Alexey Tumanov and James Cipar and Michael A. Kozuch and Gregory R. Ganger},
title = {{a}lsched: algebraic scheduling of mixed workloads in heterogeneous clouds},
booktitle = {Proc. of the 3nd ACM Symposium on Cloud Computing},
series = {SOCC '12},
year = {2012},
location = {San Jose, CA},
Publisher = {ACM},
}
@inproceedings{Cipar:2012:LTF:2168836.2168854,
author = {Cipar, James and Ganger, Greg and Keeton, Kimberly and
Morrey,III, Charles B. and Soules, Craig A.N. and Veitch, Alistair},
title = {{L}azy{B}ase: trading freshness for performance in a scalable database},
booktitle = {Proceedings of the 7th ACM european conference on Computer Systems},
series = {EuroSys '12},
year = {2012},
isbn = {978-1-4503-1223-3},
location = {Bern, Switzerland},
pages = {169--182},
numpages = {14},
url = {http://doi.acm.org/10.1145/2168836.2168854},
doi = {10.1145/2168836.2168854},
acmid = {2168854},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {consistency, freshness, pipeline},
}
@inproceedings{Amur:2010:RFP:1807128.1807164,
author = {Amur, Hrishikesh and Cipar, James and Gupta, Varun and
Ganger, Gregory R. and Kozuch, Michael A. and Schwan, Karsten},
title = {Robust and flexible power-proportional storage},
booktitle = {Proceedings of the 1st ACM symposium on Cloud computing},
series = {SoCC '10},
year = {2010},
isbn = {978-1-4503-0036-0},
location = {Indianapolis, Indiana, USA},
pages = {217--228},
numpages = {12},
url = {http://doi.acm.org/10.1145/1807128.1807164},
doi = {10.1145/1807128.1807164},
acmid = {1807164},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {cluster computing, data-layout, power-proportionality},
}
@inproceedings{Kozuch:2009:TLC:1555271.1555282,
author = {Kozuch, Michael A. and Ryan, Michael P. and Gass, Richard
and Schlosser, Steven W. and O'Hallaron, David and Cipar, James
and Krevat, Elie and L\'{o}pez, Julio and Stroucken, Michael and Ganger, Gregory R.},
title = {{T}ashi: location-aware cluster management},
booktitle = {Proceedings of the 1st workshop on Automated control for datacenters and clouds},
series = {ACDC '09},
year = {2009},
isbn = {978-1-60558-585-7},
location = {Barcelona, Spain},
pages = {43--48},
numpages = {6},
url = {http://doi.acm.org/10.1145/1555271.1555282},
doi = {10.1145/1555271.1555282},
acmid = {1555282},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {cloud computing, cluster management, virtualization},
}
Best paper award
@inproceedings{Cipar:2007:TTF:1267903.1267931,
author = {Cipar, James and Corner, Mark D. and Berger, Emery D.},
title = {{TFS}: a transparent file system for contributory storage},
booktitle = {Proceedings of the 5th USENIX conference on File and Storage Technologies},
series = {FAST '07},
year = {2007},
location = {San Jose, CA},
pages = {28--28},
numpages = {1},
url = {http://dl.acm.org/citation.cfm?id=1267903.1267931},
acmid = {1267931},
publisher = {USENIX Association},
address = {Berkeley, CA, USA},
}
@inproceedings{Cipar:2006:TCM:1267359.1267370,
author = {Cipar, James and Corner, Mark D. and Berger, Emery D.},
title = {Transparent contribution of memory},
booktitle = {Proceedings of the annual conference on USENIX '06 Annual Technical Conference},
series = {ATEC '06},
year = {2006},
location = {Boston, MA},
pages = {11--11},
numpages = {1},
url = {http://dl.acm.org/citation.cfm?id=1267359.1267370},
acmid = {1267370},
publisher = {USENIX Association},
address = {Berkeley, CA, USA},
}