3
3
title: Publications
4
4
---
5
5
< section class ="section-margin ">
6
- < div class ="container ">
6
+ < div class ="container ">
7
7
< h2 id ="publications "> 2025</ h2 >
8
8
< ul class ="publications ">
9
9
< li >
10
- < a target ="_blank " href ="paper/traincheck-osdi25-preprint.pdf "> Training with Confidence: Catching Silent Errors in Deep Learning Training with Automated Proactive Checks/a> < br >
10
+ < a target ="_blank " href ="# "> Verifying Distributed Deep Learning Training via Parallelization Equivalence</ a > < br >
11
+ < span class ="authorlist "> < i > < a href ="https://mercury-browser-ede.notion.site/yunchi " class ="nodec "> Yunchi Lu</ a > , </ i > < i > < a href ="https://naizhengtan.github.io " class ="nodec "> Cheng Tan</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yomia " class ="nodec "> Youshan Miao</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yizhu1 " class ="nodec "> Yi Zhu</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/zhxian " class ="nodec "> Xian Zhang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/fanyang " class ="nodec "> Fan Yang</ a > < br > </ i > </ span >
12
+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
13
+ </ li >
14
+ < li >
15
+ < a target ="_blank " href ="# "> Optimistic Recovery for High-Availability Software via Partial Process State Preservation</ a > < br >
16
+ < span class ="authorlist "> < i > < a href ="https://osdi.dev " class ="nodec "> Yuzhuo Jing</ a > , </ i > < i > Yuqi Mai, </ i > < i > Angting Cai, </ i > < i > < a href ="https://chenyi.world " class ="nodec "> Yi Chen</ a > , </ i > < i > < a href ="https://hwanning.netlify.app " class ="nodec "> Wanning He</ a > , </ i > < i > Xiaoyang Qian, </ i > < i > < a href ="https://web.eecs.umich.edu/~pmchen " class ="nodec "> Peter M. Chen</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
17
+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
18
+ </ li >
19
+ < li >
20
+ < a target ="_blank " href ="# "> Mitigating Application Resource Overload with Targeted Task Cancellation</ a > < br >
21
+ < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > Zeyin Zhang, </ i > < i > Yicheng Liu, </ i > < i > Yile Gu, </ i > < i > Shuangyu Lei, </ i > < i > < a href ="https://homes.cs.washington.edu/~baris " class ="nodec "> Baris Kasikci</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
22
+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
23
+ </ li >
24
+ < li >
25
+ < a target ="_blank " href ="paper/traincheck-osdi25-preprint.pdf "> Training with Confidence: Catching Silent Errors in Deep Learning Training with Automated Proactive Checks</ a > < br >
11
26
< span class ="authorlist "> < i > < a href ="https://essoz.github.io " class ="nodec "> Yuxuan Jiang</ a > , </ i > < i > Ziming Zhou, </ i > < i > Boyu Xu, </ i > < i > Beijie Liu, </ i > < i > Runhui Xu, </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
12
- < a target ="_blank " href ="https://www.usenix.org/conference/osdi25 " class ="conf "> < b > OSDI 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/traincheck-osdi25.bib "> BibTeX</ a >
13
- < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/TrainCheck "> Software</ a >
27
+ < a target ="_blank " href ="https://www.usenix.org/conference/osdi25 " class ="conf "> < b > OSDI 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/traincheck-osdi25.bib "> BibTeX</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/TrainCheck "> Software</ a > < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="https://www.arxiv.org/abs/2506.14813 "> [ArXiv]</ a >
14
28
</ li >
15
29
< li >
16
- < a target ="_blank " href ="# "> Deriving Semantic Checkers from Tests to Detect Silent Failures in Production Distributed Systems</ a > < br >
30
+ < a target ="_blank " href ="paper/t2c-osdi25-preprint.pdf "> Deriving Semantic Checkers from Tests to Detect Silent Failures in Production Distributed Systems</ a > < br >
17
31
< span class ="authorlist "> < i > < a href ="https://www.cs.jhu.edu/~chlou/about " class ="nodec "> Chang Lou</ a > , </ i > < i > Dimas Shidqi Parikesit, </ i > < i > Yujin Huang, </ i > < i > Zhewen Yang, </ i > < i > Senapati Diwangkara, </ i > < i > < a href ="https://osdi.dev " class ="nodec "> Yuzhuo Jing</ a > , </ i > < i > Achmad Imam Kistijantoro, </ i > < i > < a href ="http://www.eecg.toronto.edu/~yuan " class ="nodec "> Ding Yuan</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/sumann " class ="nodec "> Suman Nath</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
18
- < a target ="_blank " href ="https://www.usenix.org/conference/osdi25 " class ="conf "> < b > OSDI 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/T2C "> Software</ a >
32
+ < a target ="_blank " href ="https://www.usenix.org/conference/osdi25 " class ="conf "> < b > OSDI 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/t2c-osdi25.bib " > BibTeX </ a > < a target =" _blank " class =" btn btn-outline-primary publinkitem " href =" https://github.com/OrderLab/T2C "> Software</ a >
19
33
</ li >
20
34
< li >
21
35
< a target ="_blank " href ="paper/xinda-nsdi25-preprint.pdf "> One-Size-Fits-None: Understanding and Enhancing Slow-Fault Tolerance in Modern Distributed Systems</ a > < br >
@@ -50,13 +64,13 @@ <h2 id="publications">2023</h2>
50
64
</ li >
51
65
< li >
52
66
< a target ="_blank " href ="paper/pbox-sosp23.pdf "> Pushing Performance Isolation Boundaries into Application with pBox</ a > < br >
53
- < span class ="authorlist "> < i > < a href ="https://www.cs.jhu.edu/~yigonghu " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://gongqihuang.com " class ="nodec "> Gongqi Huang</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
67
+ < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://gongqihuang.com " class ="nodec "> Gongqi Huang</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
54
68
< a target ="_blank " href ="https://sosp2023.mpi-sws.org " class ="conf "> < b > SOSP 2023</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/pbox-sosp23.bib "> BibTeX</ a >
55
69
< a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/pbox_sosp23_slides.pdf "> Slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/pBox "> Software</ a >
56
70
</ li >
57
71
< li >
58
72
< a target ="_blank " href ="paper/vprof-eurosys23.pdf "> Effective Performance Issue Diagnosis with Value-Assisted Cost Profiling</ a > < br >
59
- < span class ="authorlist "> < i > Lingmei Weng, </ i > < i > < a href ="https://www.cs.jhu.edu/~yigonghu " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > < a href ="http://www.cs.columbia.edu/~nieh " class ="nodec "> Jason Nieh</ a > , </ i > < i > < a href ="http://www.cs.columbia.edu/~junfeng " class ="nodec "> Junfeng Yang</ a > < br > </ i > </ span >
73
+ < span class ="authorlist "> < i > Lingmei Weng, </ i > < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > < a href ="http://www.cs.columbia.edu/~nieh " class ="nodec "> Jason Nieh</ a > , </ i > < i > < a href ="http://www.cs.columbia.edu/~junfeng " class ="nodec "> Junfeng Yang</ a > < br > </ i > </ span >
60
74
< a target ="_blank " href ="https://2023.eurosys.org " class ="conf "> < b > EuroSys 2023</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/vprof-eurosys23.bib "> BibTeX</ a >
61
75
< a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/vprof_eurosys23_slides.pdf "> Slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/wenglingmei/vprofAE "> Software</ a >
62
76
</ li >
@@ -104,7 +118,7 @@ <h2 id="publications">2020</h2>
104
118
< ul class ="publications ">
105
119
< li >
106
120
< a target ="_blank " href ="paper/violet-osdi20.pdf "> Automated Reasoning and Detection of Specious Configuration in Large Systems with Symbolic Execution</ a > < br >
107
- < span class ="authorlist "> < i > < a href ="https://www.cs.jhu.edu/~yigonghu " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://gongqihuang.com " class ="nodec "> Gongqi Huang</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
121
+ < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://gongqihuang.com " class ="nodec "> Gongqi Huang</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
108
122
< a target ="_blank " href ="https://www.usenix.org/conference/osdi20 " class ="conf "> < b > OSDI 2020</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/violet-osdi20.bib "> BibTeX</ a >
109
123
< a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/violet_osdi20_slides.pdf "> Slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/violet "> Software</ a > < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="paper/violet-tech-report.pdf "> TechReport</ a >
110
124
</ li >
@@ -126,7 +140,7 @@ <h2 id="publications">2020</h2>
126
140
</ li >
127
141
< li >
128
142
< a target ="_blank " href ="paper/sdig-aaai20-workshop.pdf "> Scaling Performance Issue Detection and Diagnosis in Cloud Infrastructures</ a > < br >
129
- < span class ="authorlist "> < i > < a href ="https://www.cs.jhu.edu/~yigonghu " class ="nodec "> Yigong Hu</ a > , </ i > < i > Ze Li, </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > Suhas Pinnamaneni, </ i > < i > Francis David, </ i > < i > Yingnong Dang, </ i > < i > Murali Chintalapati< br > </ i > </ span >
143
+ < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > Ze Li, </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > Suhas Pinnamaneni, </ i > < i > Francis David, </ i > < i > Yingnong Dang, </ i > < i > Murali Chintalapati< br > </ i > </ span >
130
144
< a target ="_blank " href ="https://cloudintelligenceworkshop.org " class ="conf "> < b > AAAI-20 Workshop on Cloud Intelligence</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/sdig-aaai20.bib "> BibTeX</ a >
131
145
</ li >
132
146
@@ -146,7 +160,7 @@ <h2 id="publications">2019</h2>
146
160
</ li >
147
161
< li >
148
162
< a target ="_blank " href ="paper/leaseos-asplos19.pdf "> A Case for Lease-Based, Utilitarian Resource Management on Mobile Devices</ a > < b style ="color:green "> [Best Paper Award]</ b > < br >
149
- < span class ="authorlist "> < i > < a href ="https://www.cs.jhu.edu/~yigonghu " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://sylll.github.io " class ="nodec "> Suyi Liu</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
163
+ < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > < a href ="https://sylll.github.io " class ="nodec "> Suyi Liu</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
150
164
< a target ="_blank " href ="https://asplos-conference.org " class ="conf "> < b > ASPLOS 2019</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/leaseos.bib "> BibTeX</ a >
151
165
< a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/leaseos_asplos19_slides.pptx "> Slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://orderlab.io/LeaseOS "> Software</ a > < br > < div class ="press "> < b > Coverage:</ b > < a target ="_blank " href ="https://blog.acolyer.org/2019/05/31/lease-os "> The Morning Paper</ a > </ div >
152
166
</ li >
@@ -280,5 +294,5 @@ <h2 id="publications">2010</h2>
280
294
</ li >
281
295
282
296
</ ul >
283
- </ div >
297
+ </ div >
284
298
</ section >
0 commit comments